1 //===- MipsSEISelLowering.cpp - MipsSE DAG Lowering Interface -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Subclass of MipsTargetLowering specialized for mips32/64. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MipsSEISelLowering.h" 14 #include "MipsMachineFunction.h" 15 #include "MipsRegisterInfo.h" 16 #include "MipsSubtarget.h" 17 #include "llvm/ADT/APInt.h" 18 #include "llvm/ADT/STLExtras.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/CodeGen/CallingConvLower.h" 21 #include "llvm/CodeGen/ISDOpcodes.h" 22 #include "llvm/CodeGen/MachineBasicBlock.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineMemOperand.h" 27 #include "llvm/CodeGen/MachineRegisterInfo.h" 28 #include "llvm/CodeGen/SelectionDAG.h" 29 #include "llvm/CodeGen/SelectionDAGNodes.h" 30 #include "llvm/CodeGen/TargetInstrInfo.h" 31 #include "llvm/CodeGen/TargetSubtargetInfo.h" 32 #include "llvm/CodeGen/ValueTypes.h" 33 #include "llvm/CodeGenTypes/MachineValueType.h" 34 #include "llvm/IR/DebugLoc.h" 35 #include "llvm/IR/Intrinsics.h" 36 #include "llvm/IR/IntrinsicsMips.h" 37 #include "llvm/Support/Casting.h" 38 #include "llvm/Support/CommandLine.h" 39 #include "llvm/Support/Debug.h" 40 #include "llvm/Support/ErrorHandling.h" 41 #include "llvm/Support/raw_ostream.h" 42 #include "llvm/TargetParser/Triple.h" 43 #include <algorithm> 44 #include <cassert> 45 #include <cstddef> 46 #include <cstdint> 47 #include <iterator> 48 #include <utility> 49 50 using namespace llvm; 51 52 #define DEBUG_TYPE "mips-isel" 53 54 static cl::opt<bool> 55 UseMipsTailCalls("mips-tail-calls", cl::Hidden, 56 cl::desc("MIPS: permit tail calls."), cl::init(false)); 57 58 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false), 59 cl::desc("Expand double precision loads and " 60 "stores to their single precision " 61 "counterparts")); 62 63 // Widen the v2 vectors to the register width, i.e. v2i16 -> v8i16, 64 // v2i32 -> v4i32, etc, to ensure the correct rail size is used, i.e. 65 // INST.h for v16, INST.w for v32, INST.d for v64. 66 TargetLoweringBase::LegalizeTypeAction 67 MipsSETargetLowering::getPreferredVectorAction(MVT VT) const { 68 if (this->Subtarget.hasMSA()) { 69 switch (VT.SimpleTy) { 70 // Leave v2i1 vectors to be promoted to larger ones. 71 // Other i1 types will be promoted by default. 72 case MVT::v2i1: 73 return TypePromoteInteger; 74 break; 75 // 16-bit vector types (v2 and longer) 76 case MVT::v2i8: 77 // 32-bit vector types (v2 and longer) 78 case MVT::v2i16: 79 case MVT::v4i8: 80 // 64-bit vector types (v2 and longer) 81 case MVT::v2i32: 82 case MVT::v4i16: 83 case MVT::v8i8: 84 return TypeWidenVector; 85 break; 86 // Only word (.w) and doubleword (.d) are available for floating point 87 // vectors. That means floating point vectors should be either v2f64 88 // or v4f32. 89 // Here we only explicitly widen the f32 types - f16 will be promoted 90 // by default. 91 case MVT::v2f32: 92 case MVT::v3f32: 93 return TypeWidenVector; 94 // v2i64 is already 128-bit wide. 95 default: 96 break; 97 } 98 } 99 return TargetLoweringBase::getPreferredVectorAction(VT); 100 } 101 102 MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM, 103 const MipsSubtarget &STI) 104 : MipsTargetLowering(TM, STI) { 105 // Set up the register classes 106 addRegisterClass(MVT::i32, &Mips::GPR32RegClass); 107 108 if (Subtarget.isGP64bit()) 109 addRegisterClass(MVT::i64, &Mips::GPR64RegClass); 110 111 if (Subtarget.hasDSP() || Subtarget.hasMSA()) { 112 // Expand all truncating stores and extending loads. 113 for (MVT VT0 : MVT::fixedlen_vector_valuetypes()) { 114 for (MVT VT1 : MVT::fixedlen_vector_valuetypes()) { 115 setTruncStoreAction(VT0, VT1, Expand); 116 setLoadExtAction(ISD::SEXTLOAD, VT0, VT1, Expand); 117 setLoadExtAction(ISD::ZEXTLOAD, VT0, VT1, Expand); 118 setLoadExtAction(ISD::EXTLOAD, VT0, VT1, Expand); 119 } 120 } 121 } 122 123 if (Subtarget.hasDSP()) { 124 MVT::SimpleValueType VecTys[2] = {MVT::v2i16, MVT::v4i8}; 125 126 for (const auto &VecTy : VecTys) { 127 addRegisterClass(VecTy, &Mips::DSPRRegClass); 128 129 // Expand all builtin opcodes. 130 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 131 setOperationAction(Opc, VecTy, Expand); 132 133 setOperationAction(ISD::ADD, VecTy, Legal); 134 setOperationAction(ISD::SUB, VecTy, Legal); 135 setOperationAction(ISD::LOAD, VecTy, Legal); 136 setOperationAction(ISD::STORE, VecTy, Legal); 137 setOperationAction(ISD::BITCAST, VecTy, Legal); 138 } 139 140 setTargetDAGCombine( 141 {ISD::SHL, ISD::SRA, ISD::SRL, ISD::SETCC, ISD::VSELECT}); 142 143 if (Subtarget.hasMips32r2()) { 144 setOperationAction(ISD::ADDC, MVT::i32, Legal); 145 setOperationAction(ISD::ADDE, MVT::i32, Legal); 146 } 147 } 148 149 if (Subtarget.hasDSPR2()) 150 setOperationAction(ISD::MUL, MVT::v2i16, Legal); 151 152 if (Subtarget.hasMSA()) { 153 addMSAIntType(MVT::v16i8, &Mips::MSA128BRegClass); 154 addMSAIntType(MVT::v8i16, &Mips::MSA128HRegClass); 155 addMSAIntType(MVT::v4i32, &Mips::MSA128WRegClass); 156 addMSAIntType(MVT::v2i64, &Mips::MSA128DRegClass); 157 addMSAFloatType(MVT::v8f16, &Mips::MSA128HRegClass); 158 addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass); 159 addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass); 160 161 // f16 is a storage-only type, always promote it to f32. 162 addRegisterClass(MVT::f16, &Mips::MSA128HRegClass); 163 setOperationAction(ISD::SETCC, MVT::f16, Promote); 164 setOperationAction(ISD::BR_CC, MVT::f16, Promote); 165 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote); 166 setOperationAction(ISD::SELECT, MVT::f16, Promote); 167 setOperationAction(ISD::FADD, MVT::f16, Promote); 168 setOperationAction(ISD::FSUB, MVT::f16, Promote); 169 setOperationAction(ISD::FMUL, MVT::f16, Promote); 170 setOperationAction(ISD::FDIV, MVT::f16, Promote); 171 setOperationAction(ISD::FREM, MVT::f16, Promote); 172 setOperationAction(ISD::FMA, MVT::f16, Promote); 173 setOperationAction(ISD::FNEG, MVT::f16, Promote); 174 setOperationAction(ISD::FABS, MVT::f16, Promote); 175 setOperationAction(ISD::FCEIL, MVT::f16, Promote); 176 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote); 177 setOperationAction(ISD::FCOS, MVT::f16, Promote); 178 setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote); 179 setOperationAction(ISD::FFLOOR, MVT::f16, Promote); 180 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote); 181 setOperationAction(ISD::FPOW, MVT::f16, Promote); 182 setOperationAction(ISD::FPOWI, MVT::f16, Promote); 183 setOperationAction(ISD::FRINT, MVT::f16, Promote); 184 setOperationAction(ISD::FSIN, MVT::f16, Promote); 185 setOperationAction(ISD::FSINCOS, MVT::f16, Promote); 186 setOperationAction(ISD::FSQRT, MVT::f16, Promote); 187 setOperationAction(ISD::FEXP, MVT::f16, Promote); 188 setOperationAction(ISD::FEXP2, MVT::f16, Promote); 189 setOperationAction(ISD::FLOG, MVT::f16, Promote); 190 setOperationAction(ISD::FLOG2, MVT::f16, Promote); 191 setOperationAction(ISD::FLOG10, MVT::f16, Promote); 192 setOperationAction(ISD::FROUND, MVT::f16, Promote); 193 setOperationAction(ISD::FTRUNC, MVT::f16, Promote); 194 setOperationAction(ISD::FMINNUM, MVT::f16, Promote); 195 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote); 196 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote); 197 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote); 198 199 setTargetDAGCombine({ISD::AND, ISD::OR, ISD::SRA, ISD::VSELECT, ISD::XOR}); 200 } 201 202 if (!Subtarget.useSoftFloat()) { 203 addRegisterClass(MVT::f32, &Mips::FGR32RegClass); 204 205 // When dealing with single precision only, use libcalls 206 if (!Subtarget.isSingleFloat()) { 207 if (Subtarget.isFP64bit()) 208 addRegisterClass(MVT::f64, &Mips::FGR64RegClass); 209 else 210 addRegisterClass(MVT::f64, &Mips::AFGR64RegClass); 211 } 212 } 213 214 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Custom); 215 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Custom); 216 setOperationAction(ISD::MULHS, MVT::i32, Custom); 217 setOperationAction(ISD::MULHU, MVT::i32, Custom); 218 219 if (Subtarget.hasCnMips()) 220 setOperationAction(ISD::MUL, MVT::i64, Legal); 221 else if (Subtarget.isGP64bit()) 222 setOperationAction(ISD::MUL, MVT::i64, Custom); 223 224 if (Subtarget.isGP64bit()) { 225 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Custom); 226 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Custom); 227 setOperationAction(ISD::MULHS, MVT::i64, Custom); 228 setOperationAction(ISD::MULHU, MVT::i64, Custom); 229 setOperationAction(ISD::SDIVREM, MVT::i64, Custom); 230 setOperationAction(ISD::UDIVREM, MVT::i64, Custom); 231 } 232 233 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); 234 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); 235 236 setOperationAction(ISD::SDIVREM, MVT::i32, Custom); 237 setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 238 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 239 if (Subtarget.hasMips32r6()) { 240 setOperationAction(ISD::LOAD, MVT::i32, Legal); 241 setOperationAction(ISD::STORE, MVT::i32, Legal); 242 } else { 243 setOperationAction(ISD::LOAD, MVT::i32, Custom); 244 setOperationAction(ISD::STORE, MVT::i32, Custom); 245 } 246 247 setTargetDAGCombine(ISD::MUL); 248 249 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 250 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 251 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 252 253 if (Subtarget.hasMips32r2() && !Subtarget.useSoftFloat() && 254 !Subtarget.hasMips64()) { 255 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 256 } 257 258 if (NoDPLoadStore) { 259 setOperationAction(ISD::LOAD, MVT::f64, Custom); 260 setOperationAction(ISD::STORE, MVT::f64, Custom); 261 } 262 263 if (Subtarget.hasMips32r6()) { 264 // MIPS32r6 replaces the accumulator-based multiplies with a three register 265 // instruction 266 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 267 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 268 setOperationAction(ISD::MUL, MVT::i32, Legal); 269 setOperationAction(ISD::MULHS, MVT::i32, Legal); 270 setOperationAction(ISD::MULHU, MVT::i32, Legal); 271 272 // MIPS32r6 replaces the accumulator-based division/remainder with separate 273 // three register division and remainder instructions. 274 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 275 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 276 setOperationAction(ISD::SDIV, MVT::i32, Legal); 277 setOperationAction(ISD::UDIV, MVT::i32, Legal); 278 setOperationAction(ISD::SREM, MVT::i32, Legal); 279 setOperationAction(ISD::UREM, MVT::i32, Legal); 280 281 // MIPS32r6 replaces conditional moves with an equivalent that removes the 282 // need for three GPR read ports. 283 setOperationAction(ISD::SETCC, MVT::i32, Legal); 284 setOperationAction(ISD::SELECT, MVT::i32, Legal); 285 setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 286 287 setOperationAction(ISD::SETCC, MVT::f32, Legal); 288 setOperationAction(ISD::SELECT, MVT::f32, Legal); 289 setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 290 291 assert(Subtarget.isFP64bit() && "FR=1 is required for MIPS32r6"); 292 setOperationAction(ISD::SETCC, MVT::f64, Legal); 293 setOperationAction(ISD::SELECT, MVT::f64, Custom); 294 setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 295 296 setOperationAction(ISD::BRCOND, MVT::Other, Legal); 297 298 // Floating point > and >= are supported via < and <= 299 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 300 setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); 301 setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 302 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 303 304 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 305 setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); 306 setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 307 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 308 } 309 310 if (Subtarget.hasMips64r6()) { 311 // MIPS64r6 replaces the accumulator-based multiplies with a three register 312 // instruction 313 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 314 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 315 setOperationAction(ISD::MUL, MVT::i64, Legal); 316 setOperationAction(ISD::MULHS, MVT::i64, Legal); 317 setOperationAction(ISD::MULHU, MVT::i64, Legal); 318 319 // MIPS32r6 replaces the accumulator-based division/remainder with separate 320 // three register division and remainder instructions. 321 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 322 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 323 setOperationAction(ISD::SDIV, MVT::i64, Legal); 324 setOperationAction(ISD::UDIV, MVT::i64, Legal); 325 setOperationAction(ISD::SREM, MVT::i64, Legal); 326 setOperationAction(ISD::UREM, MVT::i64, Legal); 327 328 // MIPS64r6 replaces conditional moves with an equivalent that removes the 329 // need for three GPR read ports. 330 setOperationAction(ISD::SETCC, MVT::i64, Legal); 331 setOperationAction(ISD::SELECT, MVT::i64, Legal); 332 setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 333 } 334 335 computeRegisterProperties(Subtarget.getRegisterInfo()); 336 } 337 338 const MipsTargetLowering * 339 llvm::createMipsSETargetLowering(const MipsTargetMachine &TM, 340 const MipsSubtarget &STI) { 341 return new MipsSETargetLowering(TM, STI); 342 } 343 344 const TargetRegisterClass * 345 MipsSETargetLowering::getRepRegClassFor(MVT VT) const { 346 if (VT == MVT::Untyped) 347 return Subtarget.hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; 348 349 return TargetLowering::getRepRegClassFor(VT); 350 } 351 352 // Enable MSA support for the given integer type and Register class. 353 void MipsSETargetLowering:: 354 addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 355 addRegisterClass(Ty, RC); 356 357 // Expand all builtin opcodes. 358 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 359 setOperationAction(Opc, Ty, Expand); 360 361 setOperationAction(ISD::BITCAST, Ty, Legal); 362 setOperationAction(ISD::LOAD, Ty, Legal); 363 setOperationAction(ISD::STORE, Ty, Legal); 364 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Custom); 365 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 366 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 367 setOperationAction(ISD::UNDEF, Ty, Legal); 368 369 setOperationAction(ISD::ADD, Ty, Legal); 370 setOperationAction(ISD::AND, Ty, Legal); 371 setOperationAction(ISD::CTLZ, Ty, Legal); 372 setOperationAction(ISD::CTPOP, Ty, Legal); 373 setOperationAction(ISD::MUL, Ty, Legal); 374 setOperationAction(ISD::OR, Ty, Legal); 375 setOperationAction(ISD::SDIV, Ty, Legal); 376 setOperationAction(ISD::SREM, Ty, Legal); 377 setOperationAction(ISD::SHL, Ty, Legal); 378 setOperationAction(ISD::SRA, Ty, Legal); 379 setOperationAction(ISD::SRL, Ty, Legal); 380 setOperationAction(ISD::SUB, Ty, Legal); 381 setOperationAction(ISD::SMAX, Ty, Legal); 382 setOperationAction(ISD::SMIN, Ty, Legal); 383 setOperationAction(ISD::UDIV, Ty, Legal); 384 setOperationAction(ISD::UREM, Ty, Legal); 385 setOperationAction(ISD::UMAX, Ty, Legal); 386 setOperationAction(ISD::UMIN, Ty, Legal); 387 setOperationAction(ISD::VECTOR_SHUFFLE, Ty, Custom); 388 setOperationAction(ISD::VSELECT, Ty, Legal); 389 setOperationAction(ISD::XOR, Ty, Legal); 390 391 if (Ty == MVT::v4i32 || Ty == MVT::v2i64) { 392 setOperationAction(ISD::FP_TO_SINT, Ty, Legal); 393 setOperationAction(ISD::FP_TO_UINT, Ty, Legal); 394 setOperationAction(ISD::SINT_TO_FP, Ty, Legal); 395 setOperationAction(ISD::UINT_TO_FP, Ty, Legal); 396 } 397 398 setOperationAction(ISD::SETCC, Ty, Legal); 399 setCondCodeAction(ISD::SETNE, Ty, Expand); 400 setCondCodeAction(ISD::SETGE, Ty, Expand); 401 setCondCodeAction(ISD::SETGT, Ty, Expand); 402 setCondCodeAction(ISD::SETUGE, Ty, Expand); 403 setCondCodeAction(ISD::SETUGT, Ty, Expand); 404 } 405 406 // Enable MSA support for the given floating-point type and Register class. 407 void MipsSETargetLowering:: 408 addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { 409 addRegisterClass(Ty, RC); 410 411 // Expand all builtin opcodes. 412 for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) 413 setOperationAction(Opc, Ty, Expand); 414 415 setOperationAction(ISD::LOAD, Ty, Legal); 416 setOperationAction(ISD::STORE, Ty, Legal); 417 setOperationAction(ISD::BITCAST, Ty, Legal); 418 setOperationAction(ISD::EXTRACT_VECTOR_ELT, Ty, Legal); 419 setOperationAction(ISD::INSERT_VECTOR_ELT, Ty, Legal); 420 setOperationAction(ISD::BUILD_VECTOR, Ty, Custom); 421 422 if (Ty != MVT::v8f16) { 423 setOperationAction(ISD::FABS, Ty, Legal); 424 setOperationAction(ISD::FADD, Ty, Legal); 425 setOperationAction(ISD::FDIV, Ty, Legal); 426 setOperationAction(ISD::FEXP2, Ty, Legal); 427 setOperationAction(ISD::FLOG2, Ty, Legal); 428 setOperationAction(ISD::FMA, Ty, Legal); 429 setOperationAction(ISD::FMUL, Ty, Legal); 430 setOperationAction(ISD::FRINT, Ty, Legal); 431 setOperationAction(ISD::FSQRT, Ty, Legal); 432 setOperationAction(ISD::FSUB, Ty, Legal); 433 setOperationAction(ISD::VSELECT, Ty, Legal); 434 435 setOperationAction(ISD::SETCC, Ty, Legal); 436 setCondCodeAction(ISD::SETOGE, Ty, Expand); 437 setCondCodeAction(ISD::SETOGT, Ty, Expand); 438 setCondCodeAction(ISD::SETUGE, Ty, Expand); 439 setCondCodeAction(ISD::SETUGT, Ty, Expand); 440 setCondCodeAction(ISD::SETGE, Ty, Expand); 441 setCondCodeAction(ISD::SETGT, Ty, Expand); 442 } 443 } 444 445 SDValue MipsSETargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { 446 if(!Subtarget.hasMips32r6()) 447 return MipsTargetLowering::LowerOperation(Op, DAG); 448 449 EVT ResTy = Op->getValueType(0); 450 SDLoc DL(Op); 451 452 // Although MTC1_D64 takes an i32 and writes an f64, the upper 32 bits of the 453 // floating point register are undefined. Not really an issue as sel.d, which 454 // is produced from an FSELECT node, only looks at bit 0. 455 SDValue Tmp = DAG.getNode(MipsISD::MTC1_D64, DL, MVT::f64, Op->getOperand(0)); 456 return DAG.getNode(MipsISD::FSELECT, DL, ResTy, Tmp, Op->getOperand(1), 457 Op->getOperand(2)); 458 } 459 460 bool MipsSETargetLowering::allowsMisalignedMemoryAccesses( 461 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { 462 MVT::SimpleValueType SVT = VT.getSimpleVT().SimpleTy; 463 464 if (Subtarget.systemSupportsUnalignedAccess()) { 465 // MIPS32r6/MIPS64r6 is required to support unaligned access. It's 466 // implementation defined whether this is handled by hardware, software, or 467 // a hybrid of the two but it's expected that most implementations will 468 // handle the majority of cases in hardware. 469 if (Fast) 470 *Fast = 1; 471 return true; 472 } else if (Subtarget.hasMips32r6()) { 473 return false; 474 } 475 476 switch (SVT) { 477 case MVT::i64: 478 case MVT::i32: 479 if (Fast) 480 *Fast = 1; 481 return true; 482 default: 483 return false; 484 } 485 } 486 487 SDValue MipsSETargetLowering::LowerOperation(SDValue Op, 488 SelectionDAG &DAG) const { 489 switch(Op.getOpcode()) { 490 case ISD::LOAD: return lowerLOAD(Op, DAG); 491 case ISD::STORE: return lowerSTORE(Op, DAG); 492 case ISD::SMUL_LOHI: return lowerMulDiv(Op, MipsISD::Mult, true, true, DAG); 493 case ISD::UMUL_LOHI: return lowerMulDiv(Op, MipsISD::Multu, true, true, DAG); 494 case ISD::MULHS: return lowerMulDiv(Op, MipsISD::Mult, false, true, DAG); 495 case ISD::MULHU: return lowerMulDiv(Op, MipsISD::Multu, false, true, DAG); 496 case ISD::MUL: return lowerMulDiv(Op, MipsISD::Mult, true, false, DAG); 497 case ISD::SDIVREM: return lowerMulDiv(Op, MipsISD::DivRem, true, true, DAG); 498 case ISD::UDIVREM: return lowerMulDiv(Op, MipsISD::DivRemU, true, true, 499 DAG); 500 case ISD::INTRINSIC_WO_CHAIN: return lowerINTRINSIC_WO_CHAIN(Op, DAG); 501 case ISD::INTRINSIC_W_CHAIN: return lowerINTRINSIC_W_CHAIN(Op, DAG); 502 case ISD::INTRINSIC_VOID: return lowerINTRINSIC_VOID(Op, DAG); 503 case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); 504 case ISD::BUILD_VECTOR: return lowerBUILD_VECTOR(Op, DAG); 505 case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, DAG); 506 case ISD::SELECT: return lowerSELECT(Op, DAG); 507 case ISD::BITCAST: return lowerBITCAST(Op, DAG); 508 } 509 510 return MipsTargetLowering::LowerOperation(Op, DAG); 511 } 512 513 // Fold zero extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT 514 // 515 // Performs the following transformations: 516 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to zero extension if its 517 // sign/zero-extension is completely overwritten by the new one performed by 518 // the ISD::AND. 519 // - Removes redundant zero extensions performed by an ISD::AND. 520 static SDValue performANDCombine(SDNode *N, SelectionDAG &DAG, 521 TargetLowering::DAGCombinerInfo &DCI, 522 const MipsSubtarget &Subtarget) { 523 if (!Subtarget.hasMSA()) 524 return SDValue(); 525 526 SDValue Op0 = N->getOperand(0); 527 SDValue Op1 = N->getOperand(1); 528 unsigned Op0Opcode = Op0->getOpcode(); 529 530 // (and (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d) 531 // where $d + 1 == 2^n and n == 32 532 // or $d + 1 == 2^n and n <= 32 and ZExt 533 // -> (MipsVExtractZExt $a, $b, $c) 534 if (Op0Opcode == MipsISD::VEXTRACT_SEXT_ELT || 535 Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT) { 536 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(Op1); 537 538 if (!Mask) 539 return SDValue(); 540 541 int32_t Log2IfPositive = (Mask->getAPIntValue() + 1).exactLogBase2(); 542 543 if (Log2IfPositive <= 0) 544 return SDValue(); // Mask+1 is not a power of 2 545 546 SDValue Op0Op2 = Op0->getOperand(2); 547 EVT ExtendTy = cast<VTSDNode>(Op0Op2)->getVT(); 548 unsigned ExtendTySize = ExtendTy.getSizeInBits(); 549 unsigned Log2 = Log2IfPositive; 550 551 if ((Op0Opcode == MipsISD::VEXTRACT_ZEXT_ELT && Log2 >= ExtendTySize) || 552 Log2 == ExtendTySize) { 553 SDValue Ops[] = { Op0->getOperand(0), Op0->getOperand(1), Op0Op2 }; 554 return DAG.getNode(MipsISD::VEXTRACT_ZEXT_ELT, SDLoc(Op0), 555 Op0->getVTList(), 556 ArrayRef(Ops, Op0->getNumOperands())); 557 } 558 } 559 560 return SDValue(); 561 } 562 563 // Determine if the specified node is a constant vector splat. 564 // 565 // Returns true and sets Imm if: 566 // * N is a ISD::BUILD_VECTOR representing a constant splat 567 // 568 // This function is quite similar to MipsSEDAGToDAGISel::selectVSplat. The 569 // differences are that it assumes the MSA has already been checked and the 570 // arbitrary requirement for a maximum of 32-bit integers isn't applied (and 571 // must not be in order for binsri.d to be selectable). 572 static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) { 573 BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N.getNode()); 574 575 if (!Node) 576 return false; 577 578 APInt SplatValue, SplatUndef; 579 unsigned SplatBitSize; 580 bool HasAnyUndefs; 581 582 if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 583 8, !IsLittleEndian)) 584 return false; 585 586 Imm = SplatValue; 587 588 return true; 589 } 590 591 // Test whether the given node is an all-ones build_vector. 592 static bool isVectorAllOnes(SDValue N) { 593 // Look through bitcasts. Endianness doesn't matter because we are looking 594 // for an all-ones value. 595 if (N->getOpcode() == ISD::BITCAST) 596 N = N->getOperand(0); 597 598 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N); 599 600 if (!BVN) 601 return false; 602 603 APInt SplatValue, SplatUndef; 604 unsigned SplatBitSize; 605 bool HasAnyUndefs; 606 607 // Endianness doesn't matter in this context because we are looking for 608 // an all-ones value. 609 if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs)) 610 return SplatValue.isAllOnes(); 611 612 return false; 613 } 614 615 // Test whether N is the bitwise inverse of OfNode. 616 static bool isBitwiseInverse(SDValue N, SDValue OfNode) { 617 if (N->getOpcode() != ISD::XOR) 618 return false; 619 620 if (isVectorAllOnes(N->getOperand(0))) 621 return N->getOperand(1) == OfNode; 622 623 if (isVectorAllOnes(N->getOperand(1))) 624 return N->getOperand(0) == OfNode; 625 626 return false; 627 } 628 629 // Perform combines where ISD::OR is the root node. 630 // 631 // Performs the following transformations: 632 // - (or (and $a, $mask), (and $b, $inv_mask)) => (vselect $mask, $a, $b) 633 // where $inv_mask is the bitwise inverse of $mask and the 'or' has a 128-bit 634 // vector type. 635 static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, 636 TargetLowering::DAGCombinerInfo &DCI, 637 const MipsSubtarget &Subtarget) { 638 if (!Subtarget.hasMSA()) 639 return SDValue(); 640 641 EVT Ty = N->getValueType(0); 642 643 if (!Ty.is128BitVector()) 644 return SDValue(); 645 646 SDValue Op0 = N->getOperand(0); 647 SDValue Op1 = N->getOperand(1); 648 649 if (Op0->getOpcode() == ISD::AND && Op1->getOpcode() == ISD::AND) { 650 SDValue Op0Op0 = Op0->getOperand(0); 651 SDValue Op0Op1 = Op0->getOperand(1); 652 SDValue Op1Op0 = Op1->getOperand(0); 653 SDValue Op1Op1 = Op1->getOperand(1); 654 bool IsLittleEndian = !Subtarget.isLittle(); 655 656 SDValue IfSet, IfClr, Cond; 657 bool IsConstantMask = false; 658 APInt Mask, InvMask; 659 660 // If Op0Op0 is an appropriate mask, try to find it's inverse in either 661 // Op1Op0, or Op1Op1. Keep track of the Cond, IfSet, and IfClr nodes, while 662 // looking. 663 // IfClr will be set if we find a valid match. 664 if (isVSplat(Op0Op0, Mask, IsLittleEndian)) { 665 Cond = Op0Op0; 666 IfSet = Op0Op1; 667 668 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 669 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 670 IfClr = Op1Op1; 671 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 672 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 673 IfClr = Op1Op0; 674 675 IsConstantMask = true; 676 } 677 678 // If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same 679 // thing again using this mask. 680 // IfClr will be set if we find a valid match. 681 if (!IfClr.getNode() && isVSplat(Op0Op1, Mask, IsLittleEndian)) { 682 Cond = Op0Op1; 683 IfSet = Op0Op0; 684 685 if (isVSplat(Op1Op0, InvMask, IsLittleEndian) && 686 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 687 IfClr = Op1Op1; 688 else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && 689 Mask.getBitWidth() == InvMask.getBitWidth() && Mask == ~InvMask) 690 IfClr = Op1Op0; 691 692 IsConstantMask = true; 693 } 694 695 // If IfClr is not yet set, try looking for a non-constant match. 696 // IfClr will be set if we find a valid match amongst the eight 697 // possibilities. 698 if (!IfClr.getNode()) { 699 if (isBitwiseInverse(Op0Op0, Op1Op0)) { 700 Cond = Op1Op0; 701 IfSet = Op1Op1; 702 IfClr = Op0Op1; 703 } else if (isBitwiseInverse(Op0Op1, Op1Op0)) { 704 Cond = Op1Op0; 705 IfSet = Op1Op1; 706 IfClr = Op0Op0; 707 } else if (isBitwiseInverse(Op0Op0, Op1Op1)) { 708 Cond = Op1Op1; 709 IfSet = Op1Op0; 710 IfClr = Op0Op1; 711 } else if (isBitwiseInverse(Op0Op1, Op1Op1)) { 712 Cond = Op1Op1; 713 IfSet = Op1Op0; 714 IfClr = Op0Op0; 715 } else if (isBitwiseInverse(Op1Op0, Op0Op0)) { 716 Cond = Op0Op0; 717 IfSet = Op0Op1; 718 IfClr = Op1Op1; 719 } else if (isBitwiseInverse(Op1Op1, Op0Op0)) { 720 Cond = Op0Op0; 721 IfSet = Op0Op1; 722 IfClr = Op1Op0; 723 } else if (isBitwiseInverse(Op1Op0, Op0Op1)) { 724 Cond = Op0Op1; 725 IfSet = Op0Op0; 726 IfClr = Op1Op1; 727 } else if (isBitwiseInverse(Op1Op1, Op0Op1)) { 728 Cond = Op0Op1; 729 IfSet = Op0Op0; 730 IfClr = Op1Op0; 731 } 732 } 733 734 // At this point, IfClr will be set if we have a valid match. 735 if (!IfClr.getNode()) 736 return SDValue(); 737 738 assert(Cond.getNode() && IfSet.getNode()); 739 740 // Fold degenerate cases. 741 if (IsConstantMask) { 742 if (Mask.isAllOnes()) 743 return IfSet; 744 else if (Mask == 0) 745 return IfClr; 746 } 747 748 // Transform the DAG into an equivalent VSELECT. 749 return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfSet, IfClr); 750 } 751 752 return SDValue(); 753 } 754 755 static bool shouldTransformMulToShiftsAddsSubs(APInt C, EVT VT, 756 SelectionDAG &DAG, 757 const MipsSubtarget &Subtarget) { 758 // Estimate the number of operations the below transform will turn a 759 // constant multiply into. The number is approximately equal to the minimal 760 // number of powers of two that constant can be broken down to by adding 761 // or subtracting them. 762 // 763 // If we have taken more than 12[1] / 8[2] steps to attempt the 764 // optimization for a native sized value, it is more than likely that this 765 // optimization will make things worse. 766 // 767 // [1] MIPS64 requires 6 instructions at most to materialize any constant, 768 // multiplication requires at least 4 cycles, but another cycle (or two) 769 // to retrieve the result from the HI/LO registers. 770 // 771 // [2] For MIPS32, more than 8 steps is expensive as the constant could be 772 // materialized in 2 instructions, multiplication requires at least 4 773 // cycles, but another cycle (or two) to retrieve the result from the 774 // HI/LO registers. 775 // 776 // TODO: 777 // - MaxSteps needs to consider the `VT` of the constant for the current 778 // target. 779 // - Consider to perform this optimization after type legalization. 780 // That allows to remove a workaround for types not supported natively. 781 // - Take in account `-Os, -Oz` flags because this optimization 782 // increases code size. 783 unsigned MaxSteps = Subtarget.isABI_O32() ? 8 : 12; 784 785 SmallVector<APInt, 16> WorkStack(1, C); 786 unsigned Steps = 0; 787 unsigned BitWidth = C.getBitWidth(); 788 789 while (!WorkStack.empty()) { 790 APInt Val = WorkStack.pop_back_val(); 791 792 if (Val == 0 || Val == 1) 793 continue; 794 795 if (Steps >= MaxSteps) 796 return false; 797 798 if (Val.isPowerOf2()) { 799 ++Steps; 800 continue; 801 } 802 803 APInt Floor = APInt(BitWidth, 1) << Val.logBase2(); 804 APInt Ceil = Val.isNegative() ? APInt(BitWidth, 0) 805 : APInt(BitWidth, 1) << C.ceilLogBase2(); 806 if ((Val - Floor).ule(Ceil - Val)) { 807 WorkStack.push_back(Floor); 808 WorkStack.push_back(Val - Floor); 809 } else { 810 WorkStack.push_back(Ceil); 811 WorkStack.push_back(Ceil - Val); 812 } 813 814 ++Steps; 815 } 816 817 // If the value being multiplied is not supported natively, we have to pay 818 // an additional legalization cost, conservatively assume an increase in the 819 // cost of 3 instructions per step. This values for this heuristic were 820 // determined experimentally. 821 unsigned RegisterSize = DAG.getTargetLoweringInfo() 822 .getRegisterType(*DAG.getContext(), VT) 823 .getSizeInBits(); 824 Steps *= (VT.getSizeInBits() != RegisterSize) * 3; 825 if (Steps > 27) 826 return false; 827 828 return true; 829 } 830 831 static SDValue genConstMult(SDValue X, APInt C, const SDLoc &DL, EVT VT, 832 EVT ShiftTy, SelectionDAG &DAG) { 833 // Return 0. 834 if (C == 0) 835 return DAG.getConstant(0, DL, VT); 836 837 // Return x. 838 if (C == 1) 839 return X; 840 841 // If c is power of 2, return (shl x, log2(c)). 842 if (C.isPowerOf2()) 843 return DAG.getNode(ISD::SHL, DL, VT, X, 844 DAG.getConstant(C.logBase2(), DL, ShiftTy)); 845 846 unsigned BitWidth = C.getBitWidth(); 847 APInt Floor = APInt(BitWidth, 1) << C.logBase2(); 848 APInt Ceil = C.isNegative() ? APInt(BitWidth, 0) : 849 APInt(BitWidth, 1) << C.ceilLogBase2(); 850 851 // If |c - floor_c| <= |c - ceil_c|, 852 // where floor_c = pow(2, floor(log2(c))) and ceil_c = pow(2, ceil(log2(c))), 853 // return (add constMult(x, floor_c), constMult(x, c - floor_c)). 854 if ((C - Floor).ule(Ceil - C)) { 855 SDValue Op0 = genConstMult(X, Floor, DL, VT, ShiftTy, DAG); 856 SDValue Op1 = genConstMult(X, C - Floor, DL, VT, ShiftTy, DAG); 857 return DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); 858 } 859 860 // If |c - floor_c| > |c - ceil_c|, 861 // return (sub constMult(x, ceil_c), constMult(x, ceil_c - c)). 862 SDValue Op0 = genConstMult(X, Ceil, DL, VT, ShiftTy, DAG); 863 SDValue Op1 = genConstMult(X, Ceil - C, DL, VT, ShiftTy, DAG); 864 return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); 865 } 866 867 static SDValue performMULCombine(SDNode *N, SelectionDAG &DAG, 868 const TargetLowering::DAGCombinerInfo &DCI, 869 const MipsSETargetLowering *TL, 870 const MipsSubtarget &Subtarget) { 871 EVT VT = N->getValueType(0); 872 873 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) 874 if (!VT.isVector() && shouldTransformMulToShiftsAddsSubs( 875 C->getAPIntValue(), VT, DAG, Subtarget)) 876 return genConstMult(N->getOperand(0), C->getAPIntValue(), SDLoc(N), VT, 877 TL->getScalarShiftAmountTy(DAG.getDataLayout(), VT), 878 DAG); 879 880 return SDValue(N, 0); 881 } 882 883 static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, 884 SelectionDAG &DAG, 885 const MipsSubtarget &Subtarget) { 886 // See if this is a vector splat immediate node. 887 APInt SplatValue, SplatUndef; 888 unsigned SplatBitSize; 889 bool HasAnyUndefs; 890 unsigned EltSize = Ty.getScalarSizeInBits(); 891 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 892 893 if (!Subtarget.hasDSP()) 894 return SDValue(); 895 896 if (!BV || 897 !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 898 EltSize, !Subtarget.isLittle()) || 899 (SplatBitSize != EltSize) || 900 (SplatValue.getZExtValue() >= EltSize)) 901 return SDValue(); 902 903 SDLoc DL(N); 904 return DAG.getNode(Opc, DL, Ty, N->getOperand(0), 905 DAG.getConstant(SplatValue.getZExtValue(), DL, MVT::i32)); 906 } 907 908 static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, 909 TargetLowering::DAGCombinerInfo &DCI, 910 const MipsSubtarget &Subtarget) { 911 EVT Ty = N->getValueType(0); 912 913 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 914 return SDValue(); 915 916 return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); 917 } 918 919 // Fold sign-extensions into MipsISD::VEXTRACT_[SZ]EXT_ELT for MSA and fold 920 // constant splats into MipsISD::SHRA_DSP for DSPr2. 921 // 922 // Performs the following transformations: 923 // - Changes MipsISD::VEXTRACT_[SZ]EXT_ELT to sign extension if its 924 // sign/zero-extension is completely overwritten by the new one performed by 925 // the ISD::SRA and ISD::SHL nodes. 926 // - Removes redundant sign extensions performed by an ISD::SRA and ISD::SHL 927 // sequence. 928 // 929 // See performDSPShiftCombine for more information about the transformation 930 // used for DSPr2. 931 static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, 932 TargetLowering::DAGCombinerInfo &DCI, 933 const MipsSubtarget &Subtarget) { 934 EVT Ty = N->getValueType(0); 935 936 if (Subtarget.hasMSA()) { 937 SDValue Op0 = N->getOperand(0); 938 SDValue Op1 = N->getOperand(1); 939 940 // (sra (shl (MipsVExtract[SZ]Ext $a, $b, $c), imm:$d), imm:$d) 941 // where $d + sizeof($c) == 32 942 // or $d + sizeof($c) <= 32 and SExt 943 // -> (MipsVExtractSExt $a, $b, $c) 944 if (Op0->getOpcode() == ISD::SHL && Op1 == Op0->getOperand(1)) { 945 SDValue Op0Op0 = Op0->getOperand(0); 946 ConstantSDNode *ShAmount = dyn_cast<ConstantSDNode>(Op1); 947 948 if (!ShAmount) 949 return SDValue(); 950 951 if (Op0Op0->getOpcode() != MipsISD::VEXTRACT_SEXT_ELT && 952 Op0Op0->getOpcode() != MipsISD::VEXTRACT_ZEXT_ELT) 953 return SDValue(); 954 955 EVT ExtendTy = cast<VTSDNode>(Op0Op0->getOperand(2))->getVT(); 956 unsigned TotalBits = ShAmount->getZExtValue() + ExtendTy.getSizeInBits(); 957 958 if (TotalBits == 32 || 959 (Op0Op0->getOpcode() == MipsISD::VEXTRACT_SEXT_ELT && 960 TotalBits <= 32)) { 961 SDValue Ops[] = { Op0Op0->getOperand(0), Op0Op0->getOperand(1), 962 Op0Op0->getOperand(2) }; 963 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, SDLoc(Op0Op0), 964 Op0Op0->getVTList(), 965 ArrayRef(Ops, Op0Op0->getNumOperands())); 966 } 967 } 968 } 969 970 if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget.hasDSPR2())) 971 return SDValue(); 972 973 return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); 974 } 975 976 977 static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, 978 TargetLowering::DAGCombinerInfo &DCI, 979 const MipsSubtarget &Subtarget) { 980 EVT Ty = N->getValueType(0); 981 982 if (((Ty != MVT::v2i16) || !Subtarget.hasDSPR2()) && (Ty != MVT::v4i8)) 983 return SDValue(); 984 985 return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); 986 } 987 988 static bool isLegalDSPCondCode(EVT Ty, ISD::CondCode CC) { 989 bool IsV216 = (Ty == MVT::v2i16); 990 991 switch (CC) { 992 case ISD::SETEQ: 993 case ISD::SETNE: return true; 994 case ISD::SETLT: 995 case ISD::SETLE: 996 case ISD::SETGT: 997 case ISD::SETGE: return IsV216; 998 case ISD::SETULT: 999 case ISD::SETULE: 1000 case ISD::SETUGT: 1001 case ISD::SETUGE: return !IsV216; 1002 default: return false; 1003 } 1004 } 1005 1006 static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) { 1007 EVT Ty = N->getValueType(0); 1008 1009 if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) 1010 return SDValue(); 1011 1012 if (!isLegalDSPCondCode(Ty, cast<CondCodeSDNode>(N->getOperand(2))->get())) 1013 return SDValue(); 1014 1015 return DAG.getNode(MipsISD::SETCC_DSP, SDLoc(N), Ty, N->getOperand(0), 1016 N->getOperand(1), N->getOperand(2)); 1017 } 1018 1019 static SDValue performVSELECTCombine(SDNode *N, SelectionDAG &DAG) { 1020 EVT Ty = N->getValueType(0); 1021 1022 if (Ty == MVT::v2i16 || Ty == MVT::v4i8) { 1023 SDValue SetCC = N->getOperand(0); 1024 1025 if (SetCC.getOpcode() != MipsISD::SETCC_DSP) 1026 return SDValue(); 1027 1028 return DAG.getNode(MipsISD::SELECT_CC_DSP, SDLoc(N), Ty, 1029 SetCC.getOperand(0), SetCC.getOperand(1), 1030 N->getOperand(1), N->getOperand(2), SetCC.getOperand(2)); 1031 } 1032 1033 return SDValue(); 1034 } 1035 1036 static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, 1037 const MipsSubtarget &Subtarget) { 1038 EVT Ty = N->getValueType(0); 1039 1040 if (Subtarget.hasMSA() && Ty.is128BitVector() && Ty.isInteger()) { 1041 // Try the following combines: 1042 // (xor (or $a, $b), (build_vector allones)) 1043 // (xor (or $a, $b), (bitcast (build_vector allones))) 1044 SDValue Op0 = N->getOperand(0); 1045 SDValue Op1 = N->getOperand(1); 1046 SDValue NotOp; 1047 1048 if (ISD::isBuildVectorAllOnes(Op0.getNode())) 1049 NotOp = Op1; 1050 else if (ISD::isBuildVectorAllOnes(Op1.getNode())) 1051 NotOp = Op0; 1052 else 1053 return SDValue(); 1054 1055 if (NotOp->getOpcode() == ISD::OR) 1056 return DAG.getNode(MipsISD::VNOR, SDLoc(N), Ty, NotOp->getOperand(0), 1057 NotOp->getOperand(1)); 1058 } 1059 1060 return SDValue(); 1061 } 1062 1063 SDValue 1064 MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1065 SelectionDAG &DAG = DCI.DAG; 1066 SDValue Val; 1067 1068 switch (N->getOpcode()) { 1069 case ISD::AND: 1070 Val = performANDCombine(N, DAG, DCI, Subtarget); 1071 break; 1072 case ISD::OR: 1073 Val = performORCombine(N, DAG, DCI, Subtarget); 1074 break; 1075 case ISD::MUL: 1076 return performMULCombine(N, DAG, DCI, this, Subtarget); 1077 case ISD::SHL: 1078 Val = performSHLCombine(N, DAG, DCI, Subtarget); 1079 break; 1080 case ISD::SRA: 1081 return performSRACombine(N, DAG, DCI, Subtarget); 1082 case ISD::SRL: 1083 return performSRLCombine(N, DAG, DCI, Subtarget); 1084 case ISD::VSELECT: 1085 return performVSELECTCombine(N, DAG); 1086 case ISD::XOR: 1087 Val = performXORCombine(N, DAG, Subtarget); 1088 break; 1089 case ISD::SETCC: 1090 Val = performSETCCCombine(N, DAG); 1091 break; 1092 } 1093 1094 if (Val.getNode()) { 1095 LLVM_DEBUG(dbgs() << "\nMipsSE DAG Combine:\n"; 1096 N->printrWithDepth(dbgs(), &DAG); dbgs() << "\n=> \n"; 1097 Val.getNode()->printrWithDepth(dbgs(), &DAG); dbgs() << "\n"); 1098 return Val; 1099 } 1100 1101 return MipsTargetLowering::PerformDAGCombine(N, DCI); 1102 } 1103 1104 MachineBasicBlock * 1105 MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, 1106 MachineBasicBlock *BB) const { 1107 switch (MI.getOpcode()) { 1108 default: 1109 return MipsTargetLowering::EmitInstrWithCustomInserter(MI, BB); 1110 case Mips::BPOSGE32_PSEUDO: 1111 return emitBPOSGE32(MI, BB); 1112 case Mips::SNZ_B_PSEUDO: 1113 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_B); 1114 case Mips::SNZ_H_PSEUDO: 1115 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_H); 1116 case Mips::SNZ_W_PSEUDO: 1117 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_W); 1118 case Mips::SNZ_D_PSEUDO: 1119 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_D); 1120 case Mips::SNZ_V_PSEUDO: 1121 return emitMSACBranchPseudo(MI, BB, Mips::BNZ_V); 1122 case Mips::SZ_B_PSEUDO: 1123 return emitMSACBranchPseudo(MI, BB, Mips::BZ_B); 1124 case Mips::SZ_H_PSEUDO: 1125 return emitMSACBranchPseudo(MI, BB, Mips::BZ_H); 1126 case Mips::SZ_W_PSEUDO: 1127 return emitMSACBranchPseudo(MI, BB, Mips::BZ_W); 1128 case Mips::SZ_D_PSEUDO: 1129 return emitMSACBranchPseudo(MI, BB, Mips::BZ_D); 1130 case Mips::SZ_V_PSEUDO: 1131 return emitMSACBranchPseudo(MI, BB, Mips::BZ_V); 1132 case Mips::COPY_FW_PSEUDO: 1133 return emitCOPY_FW(MI, BB); 1134 case Mips::COPY_FD_PSEUDO: 1135 return emitCOPY_FD(MI, BB); 1136 case Mips::INSERT_FW_PSEUDO: 1137 return emitINSERT_FW(MI, BB); 1138 case Mips::INSERT_FD_PSEUDO: 1139 return emitINSERT_FD(MI, BB); 1140 case Mips::INSERT_B_VIDX_PSEUDO: 1141 case Mips::INSERT_B_VIDX64_PSEUDO: 1142 return emitINSERT_DF_VIDX(MI, BB, 1, false); 1143 case Mips::INSERT_H_VIDX_PSEUDO: 1144 case Mips::INSERT_H_VIDX64_PSEUDO: 1145 return emitINSERT_DF_VIDX(MI, BB, 2, false); 1146 case Mips::INSERT_W_VIDX_PSEUDO: 1147 case Mips::INSERT_W_VIDX64_PSEUDO: 1148 return emitINSERT_DF_VIDX(MI, BB, 4, false); 1149 case Mips::INSERT_D_VIDX_PSEUDO: 1150 case Mips::INSERT_D_VIDX64_PSEUDO: 1151 return emitINSERT_DF_VIDX(MI, BB, 8, false); 1152 case Mips::INSERT_FW_VIDX_PSEUDO: 1153 case Mips::INSERT_FW_VIDX64_PSEUDO: 1154 return emitINSERT_DF_VIDX(MI, BB, 4, true); 1155 case Mips::INSERT_FD_VIDX_PSEUDO: 1156 case Mips::INSERT_FD_VIDX64_PSEUDO: 1157 return emitINSERT_DF_VIDX(MI, BB, 8, true); 1158 case Mips::FILL_FW_PSEUDO: 1159 return emitFILL_FW(MI, BB); 1160 case Mips::FILL_FD_PSEUDO: 1161 return emitFILL_FD(MI, BB); 1162 case Mips::FEXP2_W_1_PSEUDO: 1163 return emitFEXP2_W_1(MI, BB); 1164 case Mips::FEXP2_D_1_PSEUDO: 1165 return emitFEXP2_D_1(MI, BB); 1166 case Mips::ST_F16: 1167 return emitST_F16_PSEUDO(MI, BB); 1168 case Mips::LD_F16: 1169 return emitLD_F16_PSEUDO(MI, BB); 1170 case Mips::MSA_FP_EXTEND_W_PSEUDO: 1171 return emitFPEXTEND_PSEUDO(MI, BB, false); 1172 case Mips::MSA_FP_ROUND_W_PSEUDO: 1173 return emitFPROUND_PSEUDO(MI, BB, false); 1174 case Mips::MSA_FP_EXTEND_D_PSEUDO: 1175 return emitFPEXTEND_PSEUDO(MI, BB, true); 1176 case Mips::MSA_FP_ROUND_D_PSEUDO: 1177 return emitFPROUND_PSEUDO(MI, BB, true); 1178 } 1179 } 1180 1181 bool MipsSETargetLowering::isEligibleForTailCallOptimization( 1182 const CCState &CCInfo, unsigned NextStackOffset, 1183 const MipsFunctionInfo &FI) const { 1184 if (!UseMipsTailCalls) 1185 return false; 1186 1187 // Exception has to be cleared with eret. 1188 if (FI.isISR()) 1189 return false; 1190 1191 // Return false if either the callee or caller has a byval argument. 1192 if (CCInfo.getInRegsParamsCount() > 0 || FI.hasByvalArg()) 1193 return false; 1194 1195 // Return true if the callee's argument area is no larger than the 1196 // caller's. 1197 return NextStackOffset <= FI.getIncomingArgSize(); 1198 } 1199 1200 void MipsSETargetLowering:: 1201 getOpndList(SmallVectorImpl<SDValue> &Ops, 1202 std::deque<std::pair<unsigned, SDValue>> &RegsToPass, 1203 bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage, 1204 bool IsCallReloc, CallLoweringInfo &CLI, SDValue Callee, 1205 SDValue Chain) const { 1206 Ops.push_back(Callee); 1207 MipsTargetLowering::getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, 1208 InternalLinkage, IsCallReloc, CLI, Callee, 1209 Chain); 1210 } 1211 1212 SDValue MipsSETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const { 1213 LoadSDNode &Nd = *cast<LoadSDNode>(Op); 1214 1215 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1216 return MipsTargetLowering::lowerLOAD(Op, DAG); 1217 1218 // Replace a double precision load with two i32 loads and a buildpair64. 1219 SDLoc DL(Op); 1220 SDValue Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1221 EVT PtrVT = Ptr.getValueType(); 1222 1223 // i32 load from lower address. 1224 SDValue Lo = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo(), 1225 Nd.getAlign(), Nd.getMemOperand()->getFlags()); 1226 1227 // i32 load from higher address. 1228 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1229 SDValue Hi = DAG.getLoad( 1230 MVT::i32, DL, Lo.getValue(1), Ptr, MachinePointerInfo(), 1231 commonAlignment(Nd.getAlign(), 4), Nd.getMemOperand()->getFlags()); 1232 1233 if (!Subtarget.isLittle()) 1234 std::swap(Lo, Hi); 1235 1236 SDValue BP = DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1237 SDValue Ops[2] = {BP, Hi.getValue(1)}; 1238 return DAG.getMergeValues(Ops, DL); 1239 } 1240 1241 SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { 1242 StoreSDNode &Nd = *cast<StoreSDNode>(Op); 1243 1244 if (Nd.getMemoryVT() != MVT::f64 || !NoDPLoadStore) 1245 return MipsTargetLowering::lowerSTORE(Op, DAG); 1246 1247 // Replace a double precision store with two extractelement64s and i32 stores. 1248 SDLoc DL(Op); 1249 SDValue Val = Nd.getValue(), Ptr = Nd.getBasePtr(), Chain = Nd.getChain(); 1250 EVT PtrVT = Ptr.getValueType(); 1251 SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1252 Val, DAG.getConstant(0, DL, MVT::i32)); 1253 SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, 1254 Val, DAG.getConstant(1, DL, MVT::i32)); 1255 1256 if (!Subtarget.isLittle()) 1257 std::swap(Lo, Hi); 1258 1259 // i32 store to lower address. 1260 Chain = DAG.getStore(Chain, DL, Lo, Ptr, MachinePointerInfo(), Nd.getAlign(), 1261 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1262 1263 // i32 store to higher address. 1264 Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, Ptr, DAG.getConstant(4, DL, PtrVT)); 1265 return DAG.getStore(Chain, DL, Hi, Ptr, MachinePointerInfo(), 1266 commonAlignment(Nd.getAlign(), 4), 1267 Nd.getMemOperand()->getFlags(), Nd.getAAInfo()); 1268 } 1269 1270 SDValue MipsSETargetLowering::lowerBITCAST(SDValue Op, 1271 SelectionDAG &DAG) const { 1272 SDLoc DL(Op); 1273 MVT Src = Op.getOperand(0).getValueType().getSimpleVT(); 1274 MVT Dest = Op.getValueType().getSimpleVT(); 1275 1276 // Bitcast i64 to double. 1277 if (Src == MVT::i64 && Dest == MVT::f64) { 1278 SDValue Lo, Hi; 1279 std::tie(Lo, Hi) = 1280 DAG.SplitScalar(Op.getOperand(0), DL, MVT::i32, MVT::i32); 1281 return DAG.getNode(MipsISD::BuildPairF64, DL, MVT::f64, Lo, Hi); 1282 } 1283 1284 // Bitcast double to i64. 1285 if (Src == MVT::f64 && Dest == MVT::i64) { 1286 // Skip lower bitcast when operand0 has converted float results to integer 1287 // which was done by function SoftenFloatResult. 1288 if (getTypeAction(*DAG.getContext(), Op.getOperand(0).getValueType()) == 1289 TargetLowering::TypeSoftenFloat) 1290 return SDValue(); 1291 SDValue Lo = 1292 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1293 DAG.getConstant(0, DL, MVT::i32)); 1294 SDValue Hi = 1295 DAG.getNode(MipsISD::ExtractElementF64, DL, MVT::i32, Op.getOperand(0), 1296 DAG.getConstant(1, DL, MVT::i32)); 1297 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1298 } 1299 1300 // Skip other cases of bitcast and use default lowering. 1301 return SDValue(); 1302 } 1303 1304 SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, 1305 bool HasLo, bool HasHi, 1306 SelectionDAG &DAG) const { 1307 // MIPS32r6/MIPS64r6 removed accumulator based multiplies. 1308 assert(!Subtarget.hasMips32r6()); 1309 1310 EVT Ty = Op.getOperand(0).getValueType(); 1311 SDLoc DL(Op); 1312 SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, 1313 Op.getOperand(0), Op.getOperand(1)); 1314 SDValue Lo, Hi; 1315 1316 if (HasLo) 1317 Lo = DAG.getNode(MipsISD::MFLO, DL, Ty, Mult); 1318 if (HasHi) 1319 Hi = DAG.getNode(MipsISD::MFHI, DL, Ty, Mult); 1320 1321 if (!HasLo || !HasHi) 1322 return HasLo ? Lo : Hi; 1323 1324 SDValue Vals[] = { Lo, Hi }; 1325 return DAG.getMergeValues(Vals, DL); 1326 } 1327 1328 static SDValue initAccumulator(SDValue In, const SDLoc &DL, SelectionDAG &DAG) { 1329 SDValue InLo, InHi; 1330 std::tie(InLo, InHi) = DAG.SplitScalar(In, DL, MVT::i32, MVT::i32); 1331 return DAG.getNode(MipsISD::MTLOHI, DL, MVT::Untyped, InLo, InHi); 1332 } 1333 1334 static SDValue extractLOHI(SDValue Op, const SDLoc &DL, SelectionDAG &DAG) { 1335 SDValue Lo = DAG.getNode(MipsISD::MFLO, DL, MVT::i32, Op); 1336 SDValue Hi = DAG.getNode(MipsISD::MFHI, DL, MVT::i32, Op); 1337 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); 1338 } 1339 1340 // This function expands mips intrinsic nodes which have 64-bit input operands 1341 // or output values. 1342 // 1343 // out64 = intrinsic-node in64 1344 // => 1345 // lo = copy (extract-element (in64, 0)) 1346 // hi = copy (extract-element (in64, 1)) 1347 // mips-specific-node 1348 // v0 = copy lo 1349 // v1 = copy hi 1350 // out64 = merge-values (v0, v1) 1351 // 1352 static SDValue lowerDSPIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1353 SDLoc DL(Op); 1354 bool HasChainIn = Op->getOperand(0).getValueType() == MVT::Other; 1355 SmallVector<SDValue, 3> Ops; 1356 unsigned OpNo = 0; 1357 1358 // See if Op has a chain input. 1359 if (HasChainIn) 1360 Ops.push_back(Op->getOperand(OpNo++)); 1361 1362 // The next operand is the intrinsic opcode. 1363 assert(Op->getOperand(OpNo).getOpcode() == ISD::TargetConstant); 1364 1365 // See if the next operand has type i64. 1366 SDValue Opnd = Op->getOperand(++OpNo), In64; 1367 1368 if (Opnd.getValueType() == MVT::i64) 1369 In64 = initAccumulator(Opnd, DL, DAG); 1370 else 1371 Ops.push_back(Opnd); 1372 1373 // Push the remaining operands. 1374 for (++OpNo ; OpNo < Op->getNumOperands(); ++OpNo) 1375 Ops.push_back(Op->getOperand(OpNo)); 1376 1377 // Add In64 to the end of the list. 1378 if (In64.getNode()) 1379 Ops.push_back(In64); 1380 1381 // Scan output. 1382 SmallVector<EVT, 2> ResTys; 1383 1384 for (EVT Ty : Op->values()) 1385 ResTys.push_back((Ty == MVT::i64) ? MVT::Untyped : Ty); 1386 1387 // Create node. 1388 SDValue Val = DAG.getNode(Opc, DL, ResTys, Ops); 1389 SDValue Out = (ResTys[0] == MVT::Untyped) ? extractLOHI(Val, DL, DAG) : Val; 1390 1391 if (!HasChainIn) 1392 return Out; 1393 1394 assert(Val->getValueType(1) == MVT::Other); 1395 SDValue Vals[] = { Out, SDValue(Val.getNode(), 1) }; 1396 return DAG.getMergeValues(Vals, DL); 1397 } 1398 1399 // Lower an MSA copy intrinsic into the specified SelectionDAG node 1400 static SDValue lowerMSACopyIntr(SDValue Op, SelectionDAG &DAG, unsigned Opc) { 1401 SDLoc DL(Op); 1402 SDValue Vec = Op->getOperand(1); 1403 SDValue Idx = Op->getOperand(2); 1404 EVT ResTy = Op->getValueType(0); 1405 EVT EltTy = Vec->getValueType(0).getVectorElementType(); 1406 1407 SDValue Result = DAG.getNode(Opc, DL, ResTy, Vec, Idx, 1408 DAG.getValueType(EltTy)); 1409 1410 return Result; 1411 } 1412 1413 static SDValue lowerMSASplatZExt(SDValue Op, unsigned OpNr, SelectionDAG &DAG) { 1414 EVT ResVecTy = Op->getValueType(0); 1415 EVT ViaVecTy = ResVecTy; 1416 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1417 SDLoc DL(Op); 1418 1419 // When ResVecTy == MVT::v2i64, LaneA is the upper 32 bits of the lane and 1420 // LaneB is the lower 32-bits. Otherwise LaneA and LaneB are alternating 1421 // lanes. 1422 SDValue LaneA = Op->getOperand(OpNr); 1423 SDValue LaneB; 1424 1425 if (ResVecTy == MVT::v2i64) { 1426 // In case of the index being passed as an immediate value, set the upper 1427 // lane to 0 so that the splati.d instruction can be matched. 1428 if (isa<ConstantSDNode>(LaneA)) 1429 LaneB = DAG.getConstant(0, DL, MVT::i32); 1430 // Having the index passed in a register, set the upper lane to the same 1431 // value as the lower - this results in the BUILD_VECTOR node not being 1432 // expanded through stack. This way we are able to pattern match the set of 1433 // nodes created here to splat.d. 1434 else 1435 LaneB = LaneA; 1436 ViaVecTy = MVT::v4i32; 1437 if(BigEndian) 1438 std::swap(LaneA, LaneB); 1439 } else 1440 LaneB = LaneA; 1441 1442 SDValue Ops[16] = { LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, 1443 LaneA, LaneB, LaneA, LaneB, LaneA, LaneB, LaneA, LaneB }; 1444 1445 SDValue Result = DAG.getBuildVector( 1446 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1447 1448 if (ViaVecTy != ResVecTy) { 1449 SDValue One = DAG.getConstant(1, DL, ViaVecTy); 1450 Result = DAG.getNode(ISD::BITCAST, DL, ResVecTy, 1451 DAG.getNode(ISD::AND, DL, ViaVecTy, Result, One)); 1452 } 1453 1454 return Result; 1455 } 1456 1457 static SDValue lowerMSASplatImm(SDValue Op, unsigned ImmOp, SelectionDAG &DAG, 1458 bool IsSigned = false) { 1459 auto *CImm = cast<ConstantSDNode>(Op->getOperand(ImmOp)); 1460 return DAG.getConstant( 1461 APInt(Op->getValueType(0).getScalarType().getSizeInBits(), 1462 IsSigned ? CImm->getSExtValue() : CImm->getZExtValue(), IsSigned), 1463 SDLoc(Op), Op->getValueType(0)); 1464 } 1465 1466 static SDValue getBuildVectorSplat(EVT VecTy, SDValue SplatValue, 1467 bool BigEndian, SelectionDAG &DAG) { 1468 EVT ViaVecTy = VecTy; 1469 SDValue SplatValueA = SplatValue; 1470 SDValue SplatValueB = SplatValue; 1471 SDLoc DL(SplatValue); 1472 1473 if (VecTy == MVT::v2i64) { 1474 // v2i64 BUILD_VECTOR must be performed via v4i32 so split into i32's. 1475 ViaVecTy = MVT::v4i32; 1476 1477 SplatValueA = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValue); 1478 SplatValueB = DAG.getNode(ISD::SRL, DL, MVT::i64, SplatValue, 1479 DAG.getConstant(32, DL, MVT::i32)); 1480 SplatValueB = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, SplatValueB); 1481 } 1482 1483 // We currently hold the parts in little endian order. Swap them if 1484 // necessary. 1485 if (BigEndian) 1486 std::swap(SplatValueA, SplatValueB); 1487 1488 SDValue Ops[16] = { SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1489 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1490 SplatValueA, SplatValueB, SplatValueA, SplatValueB, 1491 SplatValueA, SplatValueB, SplatValueA, SplatValueB }; 1492 1493 SDValue Result = DAG.getBuildVector( 1494 ViaVecTy, DL, ArrayRef(Ops, ViaVecTy.getVectorNumElements())); 1495 1496 if (VecTy != ViaVecTy) 1497 Result = DAG.getNode(ISD::BITCAST, DL, VecTy, Result); 1498 1499 return Result; 1500 } 1501 1502 static SDValue lowerMSABinaryBitImmIntr(SDValue Op, SelectionDAG &DAG, 1503 unsigned Opc, SDValue Imm, 1504 bool BigEndian) { 1505 EVT VecTy = Op->getValueType(0); 1506 SDValue Exp2Imm; 1507 SDLoc DL(Op); 1508 1509 // The DAG Combiner can't constant fold bitcasted vectors yet so we must do it 1510 // here for now. 1511 if (VecTy == MVT::v2i64) { 1512 if (ConstantSDNode *CImm = dyn_cast<ConstantSDNode>(Imm)) { 1513 APInt BitImm = APInt(64, 1) << CImm->getAPIntValue(); 1514 1515 SDValue BitImmHiOp = DAG.getConstant(BitImm.lshr(32).trunc(32), DL, 1516 MVT::i32); 1517 SDValue BitImmLoOp = DAG.getConstant(BitImm.trunc(32), DL, MVT::i32); 1518 1519 if (BigEndian) 1520 std::swap(BitImmLoOp, BitImmHiOp); 1521 1522 Exp2Imm = DAG.getNode( 1523 ISD::BITCAST, DL, MVT::v2i64, 1524 DAG.getBuildVector(MVT::v4i32, DL, 1525 {BitImmLoOp, BitImmHiOp, BitImmLoOp, BitImmHiOp})); 1526 } 1527 } 1528 1529 if (!Exp2Imm.getNode()) { 1530 // We couldnt constant fold, do a vector shift instead 1531 1532 // Extend i32 to i64 if necessary. Sign or zero extend doesn't matter since 1533 // only values 0-63 are valid. 1534 if (VecTy == MVT::v2i64) 1535 Imm = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Imm); 1536 1537 Exp2Imm = getBuildVectorSplat(VecTy, Imm, BigEndian, DAG); 1538 1539 Exp2Imm = DAG.getNode(ISD::SHL, DL, VecTy, DAG.getConstant(1, DL, VecTy), 1540 Exp2Imm); 1541 } 1542 1543 return DAG.getNode(Opc, DL, VecTy, Op->getOperand(1), Exp2Imm); 1544 } 1545 1546 static SDValue truncateVecElts(SDValue Op, SelectionDAG &DAG) { 1547 SDLoc DL(Op); 1548 EVT ResTy = Op->getValueType(0); 1549 SDValue Vec = Op->getOperand(2); 1550 bool BigEndian = !DAG.getSubtarget().getTargetTriple().isLittleEndian(); 1551 MVT ResEltTy = ResTy == MVT::v2i64 ? MVT::i64 : MVT::i32; 1552 SDValue ConstValue = DAG.getConstant(Vec.getScalarValueSizeInBits() - 1, 1553 DL, ResEltTy); 1554 SDValue SplatVec = getBuildVectorSplat(ResTy, ConstValue, BigEndian, DAG); 1555 1556 return DAG.getNode(ISD::AND, DL, ResTy, Vec, SplatVec); 1557 } 1558 1559 static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) { 1560 EVT ResTy = Op->getValueType(0); 1561 SDLoc DL(Op); 1562 SDValue One = DAG.getConstant(1, DL, ResTy); 1563 SDValue Bit = DAG.getNode(ISD::SHL, DL, ResTy, One, truncateVecElts(Op, DAG)); 1564 1565 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), 1566 DAG.getNOT(DL, Bit, ResTy)); 1567 } 1568 1569 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) { 1570 SDLoc DL(Op); 1571 EVT ResTy = Op->getValueType(0); 1572 APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1) 1573 << Op->getConstantOperandAPInt(2); 1574 SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy); 1575 1576 return DAG.getNode(ISD::AND, DL, ResTy, Op->getOperand(1), BitMask); 1577 } 1578 1579 SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 1580 SelectionDAG &DAG) const { 1581 SDLoc DL(Op); 1582 unsigned Intrinsic = Op->getConstantOperandVal(0); 1583 switch (Intrinsic) { 1584 default: 1585 return SDValue(); 1586 case Intrinsic::mips_shilo: 1587 return lowerDSPIntr(Op, DAG, MipsISD::SHILO); 1588 case Intrinsic::mips_dpau_h_qbl: 1589 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBL); 1590 case Intrinsic::mips_dpau_h_qbr: 1591 return lowerDSPIntr(Op, DAG, MipsISD::DPAU_H_QBR); 1592 case Intrinsic::mips_dpsu_h_qbl: 1593 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBL); 1594 case Intrinsic::mips_dpsu_h_qbr: 1595 return lowerDSPIntr(Op, DAG, MipsISD::DPSU_H_QBR); 1596 case Intrinsic::mips_dpa_w_ph: 1597 return lowerDSPIntr(Op, DAG, MipsISD::DPA_W_PH); 1598 case Intrinsic::mips_dps_w_ph: 1599 return lowerDSPIntr(Op, DAG, MipsISD::DPS_W_PH); 1600 case Intrinsic::mips_dpax_w_ph: 1601 return lowerDSPIntr(Op, DAG, MipsISD::DPAX_W_PH); 1602 case Intrinsic::mips_dpsx_w_ph: 1603 return lowerDSPIntr(Op, DAG, MipsISD::DPSX_W_PH); 1604 case Intrinsic::mips_mulsa_w_ph: 1605 return lowerDSPIntr(Op, DAG, MipsISD::MULSA_W_PH); 1606 case Intrinsic::mips_mult: 1607 return lowerDSPIntr(Op, DAG, MipsISD::Mult); 1608 case Intrinsic::mips_multu: 1609 return lowerDSPIntr(Op, DAG, MipsISD::Multu); 1610 case Intrinsic::mips_madd: 1611 return lowerDSPIntr(Op, DAG, MipsISD::MAdd); 1612 case Intrinsic::mips_maddu: 1613 return lowerDSPIntr(Op, DAG, MipsISD::MAddu); 1614 case Intrinsic::mips_msub: 1615 return lowerDSPIntr(Op, DAG, MipsISD::MSub); 1616 case Intrinsic::mips_msubu: 1617 return lowerDSPIntr(Op, DAG, MipsISD::MSubu); 1618 case Intrinsic::mips_addv_b: 1619 case Intrinsic::mips_addv_h: 1620 case Intrinsic::mips_addv_w: 1621 case Intrinsic::mips_addv_d: 1622 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1623 Op->getOperand(2)); 1624 case Intrinsic::mips_addvi_b: 1625 case Intrinsic::mips_addvi_h: 1626 case Intrinsic::mips_addvi_w: 1627 case Intrinsic::mips_addvi_d: 1628 return DAG.getNode(ISD::ADD, DL, Op->getValueType(0), Op->getOperand(1), 1629 lowerMSASplatImm(Op, 2, DAG)); 1630 case Intrinsic::mips_and_v: 1631 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1632 Op->getOperand(2)); 1633 case Intrinsic::mips_andi_b: 1634 return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), 1635 lowerMSASplatImm(Op, 2, DAG)); 1636 case Intrinsic::mips_bclr_b: 1637 case Intrinsic::mips_bclr_h: 1638 case Intrinsic::mips_bclr_w: 1639 case Intrinsic::mips_bclr_d: 1640 return lowerMSABitClear(Op, DAG); 1641 case Intrinsic::mips_bclri_b: 1642 case Intrinsic::mips_bclri_h: 1643 case Intrinsic::mips_bclri_w: 1644 case Intrinsic::mips_bclri_d: 1645 return lowerMSABitClearImm(Op, DAG); 1646 case Intrinsic::mips_binsli_b: 1647 case Intrinsic::mips_binsli_h: 1648 case Intrinsic::mips_binsli_w: 1649 case Intrinsic::mips_binsli_d: { 1650 // binsli_x(IfClear, IfSet, nbits) -> (vselect LBitsMask, IfSet, IfClear) 1651 EVT VecTy = Op->getValueType(0); 1652 EVT EltTy = VecTy.getVectorElementType(); 1653 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1654 report_fatal_error("Immediate out of range"); 1655 APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), 1656 Op->getConstantOperandVal(3) + 1); 1657 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1658 DAG.getConstant(Mask, DL, VecTy, true), 1659 Op->getOperand(2), Op->getOperand(1)); 1660 } 1661 case Intrinsic::mips_binsri_b: 1662 case Intrinsic::mips_binsri_h: 1663 case Intrinsic::mips_binsri_w: 1664 case Intrinsic::mips_binsri_d: { 1665 // binsri_x(IfClear, IfSet, nbits) -> (vselect RBitsMask, IfSet, IfClear) 1666 EVT VecTy = Op->getValueType(0); 1667 EVT EltTy = VecTy.getVectorElementType(); 1668 if (Op->getConstantOperandVal(3) >= EltTy.getSizeInBits()) 1669 report_fatal_error("Immediate out of range"); 1670 APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), 1671 Op->getConstantOperandVal(3) + 1); 1672 return DAG.getNode(ISD::VSELECT, DL, VecTy, 1673 DAG.getConstant(Mask, DL, VecTy, true), 1674 Op->getOperand(2), Op->getOperand(1)); 1675 } 1676 case Intrinsic::mips_bmnz_v: 1677 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1678 Op->getOperand(2), Op->getOperand(1)); 1679 case Intrinsic::mips_bmnzi_b: 1680 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1681 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2), 1682 Op->getOperand(1)); 1683 case Intrinsic::mips_bmz_v: 1684 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3), 1685 Op->getOperand(1), Op->getOperand(2)); 1686 case Intrinsic::mips_bmzi_b: 1687 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1688 lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1), 1689 Op->getOperand(2)); 1690 case Intrinsic::mips_bneg_b: 1691 case Intrinsic::mips_bneg_h: 1692 case Intrinsic::mips_bneg_w: 1693 case Intrinsic::mips_bneg_d: { 1694 EVT VecTy = Op->getValueType(0); 1695 SDValue One = DAG.getConstant(1, DL, VecTy); 1696 1697 return DAG.getNode(ISD::XOR, DL, VecTy, Op->getOperand(1), 1698 DAG.getNode(ISD::SHL, DL, VecTy, One, 1699 truncateVecElts(Op, DAG))); 1700 } 1701 case Intrinsic::mips_bnegi_b: 1702 case Intrinsic::mips_bnegi_h: 1703 case Intrinsic::mips_bnegi_w: 1704 case Intrinsic::mips_bnegi_d: 1705 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::XOR, Op->getOperand(2), 1706 !Subtarget.isLittle()); 1707 case Intrinsic::mips_bnz_b: 1708 case Intrinsic::mips_bnz_h: 1709 case Intrinsic::mips_bnz_w: 1710 case Intrinsic::mips_bnz_d: 1711 return DAG.getNode(MipsISD::VALL_NONZERO, DL, Op->getValueType(0), 1712 Op->getOperand(1)); 1713 case Intrinsic::mips_bnz_v: 1714 return DAG.getNode(MipsISD::VANY_NONZERO, DL, Op->getValueType(0), 1715 Op->getOperand(1)); 1716 case Intrinsic::mips_bsel_v: 1717 // bsel_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1718 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1719 Op->getOperand(1), Op->getOperand(3), 1720 Op->getOperand(2)); 1721 case Intrinsic::mips_bseli_b: 1722 // bseli_v(Mask, IfClear, IfSet) -> (vselect Mask, IfSet, IfClear) 1723 return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), 1724 Op->getOperand(1), lowerMSASplatImm(Op, 3, DAG), 1725 Op->getOperand(2)); 1726 case Intrinsic::mips_bset_b: 1727 case Intrinsic::mips_bset_h: 1728 case Intrinsic::mips_bset_w: 1729 case Intrinsic::mips_bset_d: { 1730 EVT VecTy = Op->getValueType(0); 1731 SDValue One = DAG.getConstant(1, DL, VecTy); 1732 1733 return DAG.getNode(ISD::OR, DL, VecTy, Op->getOperand(1), 1734 DAG.getNode(ISD::SHL, DL, VecTy, One, 1735 truncateVecElts(Op, DAG))); 1736 } 1737 case Intrinsic::mips_bseti_b: 1738 case Intrinsic::mips_bseti_h: 1739 case Intrinsic::mips_bseti_w: 1740 case Intrinsic::mips_bseti_d: 1741 return lowerMSABinaryBitImmIntr(Op, DAG, ISD::OR, Op->getOperand(2), 1742 !Subtarget.isLittle()); 1743 case Intrinsic::mips_bz_b: 1744 case Intrinsic::mips_bz_h: 1745 case Intrinsic::mips_bz_w: 1746 case Intrinsic::mips_bz_d: 1747 return DAG.getNode(MipsISD::VALL_ZERO, DL, Op->getValueType(0), 1748 Op->getOperand(1)); 1749 case Intrinsic::mips_bz_v: 1750 return DAG.getNode(MipsISD::VANY_ZERO, DL, Op->getValueType(0), 1751 Op->getOperand(1)); 1752 case Intrinsic::mips_ceq_b: 1753 case Intrinsic::mips_ceq_h: 1754 case Intrinsic::mips_ceq_w: 1755 case Intrinsic::mips_ceq_d: 1756 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1757 Op->getOperand(2), ISD::SETEQ); 1758 case Intrinsic::mips_ceqi_b: 1759 case Intrinsic::mips_ceqi_h: 1760 case Intrinsic::mips_ceqi_w: 1761 case Intrinsic::mips_ceqi_d: 1762 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1763 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETEQ); 1764 case Intrinsic::mips_cle_s_b: 1765 case Intrinsic::mips_cle_s_h: 1766 case Intrinsic::mips_cle_s_w: 1767 case Intrinsic::mips_cle_s_d: 1768 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1769 Op->getOperand(2), ISD::SETLE); 1770 case Intrinsic::mips_clei_s_b: 1771 case Intrinsic::mips_clei_s_h: 1772 case Intrinsic::mips_clei_s_w: 1773 case Intrinsic::mips_clei_s_d: 1774 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1775 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLE); 1776 case Intrinsic::mips_cle_u_b: 1777 case Intrinsic::mips_cle_u_h: 1778 case Intrinsic::mips_cle_u_w: 1779 case Intrinsic::mips_cle_u_d: 1780 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1781 Op->getOperand(2), ISD::SETULE); 1782 case Intrinsic::mips_clei_u_b: 1783 case Intrinsic::mips_clei_u_h: 1784 case Intrinsic::mips_clei_u_w: 1785 case Intrinsic::mips_clei_u_d: 1786 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1787 lowerMSASplatImm(Op, 2, DAG), ISD::SETULE); 1788 case Intrinsic::mips_clt_s_b: 1789 case Intrinsic::mips_clt_s_h: 1790 case Intrinsic::mips_clt_s_w: 1791 case Intrinsic::mips_clt_s_d: 1792 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1793 Op->getOperand(2), ISD::SETLT); 1794 case Intrinsic::mips_clti_s_b: 1795 case Intrinsic::mips_clti_s_h: 1796 case Intrinsic::mips_clti_s_w: 1797 case Intrinsic::mips_clti_s_d: 1798 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1799 lowerMSASplatImm(Op, 2, DAG, true), ISD::SETLT); 1800 case Intrinsic::mips_clt_u_b: 1801 case Intrinsic::mips_clt_u_h: 1802 case Intrinsic::mips_clt_u_w: 1803 case Intrinsic::mips_clt_u_d: 1804 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1805 Op->getOperand(2), ISD::SETULT); 1806 case Intrinsic::mips_clti_u_b: 1807 case Intrinsic::mips_clti_u_h: 1808 case Intrinsic::mips_clti_u_w: 1809 case Intrinsic::mips_clti_u_d: 1810 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1811 lowerMSASplatImm(Op, 2, DAG), ISD::SETULT); 1812 case Intrinsic::mips_copy_s_b: 1813 case Intrinsic::mips_copy_s_h: 1814 case Intrinsic::mips_copy_s_w: 1815 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1816 case Intrinsic::mips_copy_s_d: 1817 if (Subtarget.hasMips64()) 1818 // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. 1819 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); 1820 else { 1821 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1822 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1823 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1824 Op->getValueType(0), Op->getOperand(1), 1825 Op->getOperand(2)); 1826 } 1827 case Intrinsic::mips_copy_u_b: 1828 case Intrinsic::mips_copy_u_h: 1829 case Intrinsic::mips_copy_u_w: 1830 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1831 case Intrinsic::mips_copy_u_d: 1832 if (Subtarget.hasMips64()) 1833 // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. 1834 return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); 1835 else { 1836 // Lower into the generic EXTRACT_VECTOR_ELT node and let the type 1837 // legalizer and EXTRACT_VECTOR_ELT lowering sort it out. 1838 // Note: When i64 is illegal, this results in copy_s.w instructions 1839 // instead of copy_u.w instructions. This makes no difference to the 1840 // behaviour since i64 is only illegal when the register file is 32-bit. 1841 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 1842 Op->getValueType(0), Op->getOperand(1), 1843 Op->getOperand(2)); 1844 } 1845 case Intrinsic::mips_div_s_b: 1846 case Intrinsic::mips_div_s_h: 1847 case Intrinsic::mips_div_s_w: 1848 case Intrinsic::mips_div_s_d: 1849 return DAG.getNode(ISD::SDIV, DL, Op->getValueType(0), Op->getOperand(1), 1850 Op->getOperand(2)); 1851 case Intrinsic::mips_div_u_b: 1852 case Intrinsic::mips_div_u_h: 1853 case Intrinsic::mips_div_u_w: 1854 case Intrinsic::mips_div_u_d: 1855 return DAG.getNode(ISD::UDIV, DL, Op->getValueType(0), Op->getOperand(1), 1856 Op->getOperand(2)); 1857 case Intrinsic::mips_fadd_w: 1858 case Intrinsic::mips_fadd_d: 1859 // TODO: If intrinsics have fast-math-flags, propagate them. 1860 return DAG.getNode(ISD::FADD, DL, Op->getValueType(0), Op->getOperand(1), 1861 Op->getOperand(2)); 1862 // Don't lower mips_fcaf_[wd] since LLVM folds SETFALSE condcodes away 1863 case Intrinsic::mips_fceq_w: 1864 case Intrinsic::mips_fceq_d: 1865 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1866 Op->getOperand(2), ISD::SETOEQ); 1867 case Intrinsic::mips_fcle_w: 1868 case Intrinsic::mips_fcle_d: 1869 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1870 Op->getOperand(2), ISD::SETOLE); 1871 case Intrinsic::mips_fclt_w: 1872 case Intrinsic::mips_fclt_d: 1873 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1874 Op->getOperand(2), ISD::SETOLT); 1875 case Intrinsic::mips_fcne_w: 1876 case Intrinsic::mips_fcne_d: 1877 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1878 Op->getOperand(2), ISD::SETONE); 1879 case Intrinsic::mips_fcor_w: 1880 case Intrinsic::mips_fcor_d: 1881 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1882 Op->getOperand(2), ISD::SETO); 1883 case Intrinsic::mips_fcueq_w: 1884 case Intrinsic::mips_fcueq_d: 1885 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1886 Op->getOperand(2), ISD::SETUEQ); 1887 case Intrinsic::mips_fcule_w: 1888 case Intrinsic::mips_fcule_d: 1889 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1890 Op->getOperand(2), ISD::SETULE); 1891 case Intrinsic::mips_fcult_w: 1892 case Intrinsic::mips_fcult_d: 1893 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1894 Op->getOperand(2), ISD::SETULT); 1895 case Intrinsic::mips_fcun_w: 1896 case Intrinsic::mips_fcun_d: 1897 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1898 Op->getOperand(2), ISD::SETUO); 1899 case Intrinsic::mips_fcune_w: 1900 case Intrinsic::mips_fcune_d: 1901 return DAG.getSetCC(DL, Op->getValueType(0), Op->getOperand(1), 1902 Op->getOperand(2), ISD::SETUNE); 1903 case Intrinsic::mips_fdiv_w: 1904 case Intrinsic::mips_fdiv_d: 1905 // TODO: If intrinsics have fast-math-flags, propagate them. 1906 return DAG.getNode(ISD::FDIV, DL, Op->getValueType(0), Op->getOperand(1), 1907 Op->getOperand(2)); 1908 case Intrinsic::mips_ffint_u_w: 1909 case Intrinsic::mips_ffint_u_d: 1910 return DAG.getNode(ISD::UINT_TO_FP, DL, Op->getValueType(0), 1911 Op->getOperand(1)); 1912 case Intrinsic::mips_ffint_s_w: 1913 case Intrinsic::mips_ffint_s_d: 1914 return DAG.getNode(ISD::SINT_TO_FP, DL, Op->getValueType(0), 1915 Op->getOperand(1)); 1916 case Intrinsic::mips_fill_b: 1917 case Intrinsic::mips_fill_h: 1918 case Intrinsic::mips_fill_w: 1919 case Intrinsic::mips_fill_d: { 1920 EVT ResTy = Op->getValueType(0); 1921 SmallVector<SDValue, 16> Ops(ResTy.getVectorNumElements(), 1922 Op->getOperand(1)); 1923 1924 // If ResTy is v2i64 then the type legalizer will break this node down into 1925 // an equivalent v4i32. 1926 return DAG.getBuildVector(ResTy, DL, Ops); 1927 } 1928 case Intrinsic::mips_fexp2_w: 1929 case Intrinsic::mips_fexp2_d: { 1930 // TODO: If intrinsics have fast-math-flags, propagate them. 1931 EVT ResTy = Op->getValueType(0); 1932 return DAG.getNode( 1933 ISD::FMUL, SDLoc(Op), ResTy, Op->getOperand(1), 1934 DAG.getNode(ISD::FEXP2, SDLoc(Op), ResTy, Op->getOperand(2))); 1935 } 1936 case Intrinsic::mips_flog2_w: 1937 case Intrinsic::mips_flog2_d: 1938 return DAG.getNode(ISD::FLOG2, DL, Op->getValueType(0), Op->getOperand(1)); 1939 case Intrinsic::mips_fmadd_w: 1940 case Intrinsic::mips_fmadd_d: 1941 return DAG.getNode(ISD::FMA, SDLoc(Op), Op->getValueType(0), 1942 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1943 case Intrinsic::mips_fmul_w: 1944 case Intrinsic::mips_fmul_d: 1945 // TODO: If intrinsics have fast-math-flags, propagate them. 1946 return DAG.getNode(ISD::FMUL, DL, Op->getValueType(0), Op->getOperand(1), 1947 Op->getOperand(2)); 1948 case Intrinsic::mips_fmsub_w: 1949 case Intrinsic::mips_fmsub_d: { 1950 // TODO: If intrinsics have fast-math-flags, propagate them. 1951 return DAG.getNode(MipsISD::FMS, SDLoc(Op), Op->getValueType(0), 1952 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 1953 } 1954 case Intrinsic::mips_frint_w: 1955 case Intrinsic::mips_frint_d: 1956 return DAG.getNode(ISD::FRINT, DL, Op->getValueType(0), Op->getOperand(1)); 1957 case Intrinsic::mips_fsqrt_w: 1958 case Intrinsic::mips_fsqrt_d: 1959 return DAG.getNode(ISD::FSQRT, DL, Op->getValueType(0), Op->getOperand(1)); 1960 case Intrinsic::mips_fsub_w: 1961 case Intrinsic::mips_fsub_d: 1962 // TODO: If intrinsics have fast-math-flags, propagate them. 1963 return DAG.getNode(ISD::FSUB, DL, Op->getValueType(0), Op->getOperand(1), 1964 Op->getOperand(2)); 1965 case Intrinsic::mips_ftrunc_u_w: 1966 case Intrinsic::mips_ftrunc_u_d: 1967 return DAG.getNode(ISD::FP_TO_UINT, DL, Op->getValueType(0), 1968 Op->getOperand(1)); 1969 case Intrinsic::mips_ftrunc_s_w: 1970 case Intrinsic::mips_ftrunc_s_d: 1971 return DAG.getNode(ISD::FP_TO_SINT, DL, Op->getValueType(0), 1972 Op->getOperand(1)); 1973 case Intrinsic::mips_ilvev_b: 1974 case Intrinsic::mips_ilvev_h: 1975 case Intrinsic::mips_ilvev_w: 1976 case Intrinsic::mips_ilvev_d: 1977 return DAG.getNode(MipsISD::ILVEV, DL, Op->getValueType(0), 1978 Op->getOperand(1), Op->getOperand(2)); 1979 case Intrinsic::mips_ilvl_b: 1980 case Intrinsic::mips_ilvl_h: 1981 case Intrinsic::mips_ilvl_w: 1982 case Intrinsic::mips_ilvl_d: 1983 return DAG.getNode(MipsISD::ILVL, DL, Op->getValueType(0), 1984 Op->getOperand(1), Op->getOperand(2)); 1985 case Intrinsic::mips_ilvod_b: 1986 case Intrinsic::mips_ilvod_h: 1987 case Intrinsic::mips_ilvod_w: 1988 case Intrinsic::mips_ilvod_d: 1989 return DAG.getNode(MipsISD::ILVOD, DL, Op->getValueType(0), 1990 Op->getOperand(1), Op->getOperand(2)); 1991 case Intrinsic::mips_ilvr_b: 1992 case Intrinsic::mips_ilvr_h: 1993 case Intrinsic::mips_ilvr_w: 1994 case Intrinsic::mips_ilvr_d: 1995 return DAG.getNode(MipsISD::ILVR, DL, Op->getValueType(0), 1996 Op->getOperand(1), Op->getOperand(2)); 1997 case Intrinsic::mips_insert_b: 1998 case Intrinsic::mips_insert_h: 1999 case Intrinsic::mips_insert_w: 2000 case Intrinsic::mips_insert_d: 2001 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), Op->getValueType(0), 2002 Op->getOperand(1), Op->getOperand(3), Op->getOperand(2)); 2003 case Intrinsic::mips_insve_b: 2004 case Intrinsic::mips_insve_h: 2005 case Intrinsic::mips_insve_w: 2006 case Intrinsic::mips_insve_d: { 2007 // Report an error for out of range values. 2008 int64_t Max; 2009 switch (Intrinsic) { 2010 case Intrinsic::mips_insve_b: Max = 15; break; 2011 case Intrinsic::mips_insve_h: Max = 7; break; 2012 case Intrinsic::mips_insve_w: Max = 3; break; 2013 case Intrinsic::mips_insve_d: Max = 1; break; 2014 default: llvm_unreachable("Unmatched intrinsic"); 2015 } 2016 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2017 if (Value < 0 || Value > Max) 2018 report_fatal_error("Immediate out of range"); 2019 return DAG.getNode(MipsISD::INSVE, DL, Op->getValueType(0), 2020 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3), 2021 DAG.getConstant(0, DL, MVT::i32)); 2022 } 2023 case Intrinsic::mips_ldi_b: 2024 case Intrinsic::mips_ldi_h: 2025 case Intrinsic::mips_ldi_w: 2026 case Intrinsic::mips_ldi_d: 2027 return lowerMSASplatImm(Op, 1, DAG, true); 2028 case Intrinsic::mips_lsa: 2029 case Intrinsic::mips_dlsa: { 2030 EVT ResTy = Op->getValueType(0); 2031 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 2032 DAG.getNode(ISD::SHL, SDLoc(Op), ResTy, 2033 Op->getOperand(2), Op->getOperand(3))); 2034 } 2035 case Intrinsic::mips_maddv_b: 2036 case Intrinsic::mips_maddv_h: 2037 case Intrinsic::mips_maddv_w: 2038 case Intrinsic::mips_maddv_d: { 2039 EVT ResTy = Op->getValueType(0); 2040 return DAG.getNode(ISD::ADD, SDLoc(Op), ResTy, Op->getOperand(1), 2041 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2042 Op->getOperand(2), Op->getOperand(3))); 2043 } 2044 case Intrinsic::mips_max_s_b: 2045 case Intrinsic::mips_max_s_h: 2046 case Intrinsic::mips_max_s_w: 2047 case Intrinsic::mips_max_s_d: 2048 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2049 Op->getOperand(1), Op->getOperand(2)); 2050 case Intrinsic::mips_max_u_b: 2051 case Intrinsic::mips_max_u_h: 2052 case Intrinsic::mips_max_u_w: 2053 case Intrinsic::mips_max_u_d: 2054 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2055 Op->getOperand(1), Op->getOperand(2)); 2056 case Intrinsic::mips_maxi_s_b: 2057 case Intrinsic::mips_maxi_s_h: 2058 case Intrinsic::mips_maxi_s_w: 2059 case Intrinsic::mips_maxi_s_d: 2060 return DAG.getNode(ISD::SMAX, DL, Op->getValueType(0), 2061 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2062 case Intrinsic::mips_maxi_u_b: 2063 case Intrinsic::mips_maxi_u_h: 2064 case Intrinsic::mips_maxi_u_w: 2065 case Intrinsic::mips_maxi_u_d: 2066 return DAG.getNode(ISD::UMAX, DL, Op->getValueType(0), 2067 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2068 case Intrinsic::mips_min_s_b: 2069 case Intrinsic::mips_min_s_h: 2070 case Intrinsic::mips_min_s_w: 2071 case Intrinsic::mips_min_s_d: 2072 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2073 Op->getOperand(1), Op->getOperand(2)); 2074 case Intrinsic::mips_min_u_b: 2075 case Intrinsic::mips_min_u_h: 2076 case Intrinsic::mips_min_u_w: 2077 case Intrinsic::mips_min_u_d: 2078 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2079 Op->getOperand(1), Op->getOperand(2)); 2080 case Intrinsic::mips_mini_s_b: 2081 case Intrinsic::mips_mini_s_h: 2082 case Intrinsic::mips_mini_s_w: 2083 case Intrinsic::mips_mini_s_d: 2084 return DAG.getNode(ISD::SMIN, DL, Op->getValueType(0), 2085 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG, true)); 2086 case Intrinsic::mips_mini_u_b: 2087 case Intrinsic::mips_mini_u_h: 2088 case Intrinsic::mips_mini_u_w: 2089 case Intrinsic::mips_mini_u_d: 2090 return DAG.getNode(ISD::UMIN, DL, Op->getValueType(0), 2091 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2092 case Intrinsic::mips_mod_s_b: 2093 case Intrinsic::mips_mod_s_h: 2094 case Intrinsic::mips_mod_s_w: 2095 case Intrinsic::mips_mod_s_d: 2096 return DAG.getNode(ISD::SREM, DL, Op->getValueType(0), Op->getOperand(1), 2097 Op->getOperand(2)); 2098 case Intrinsic::mips_mod_u_b: 2099 case Intrinsic::mips_mod_u_h: 2100 case Intrinsic::mips_mod_u_w: 2101 case Intrinsic::mips_mod_u_d: 2102 return DAG.getNode(ISD::UREM, DL, Op->getValueType(0), Op->getOperand(1), 2103 Op->getOperand(2)); 2104 case Intrinsic::mips_mulv_b: 2105 case Intrinsic::mips_mulv_h: 2106 case Intrinsic::mips_mulv_w: 2107 case Intrinsic::mips_mulv_d: 2108 return DAG.getNode(ISD::MUL, DL, Op->getValueType(0), Op->getOperand(1), 2109 Op->getOperand(2)); 2110 case Intrinsic::mips_msubv_b: 2111 case Intrinsic::mips_msubv_h: 2112 case Intrinsic::mips_msubv_w: 2113 case Intrinsic::mips_msubv_d: { 2114 EVT ResTy = Op->getValueType(0); 2115 return DAG.getNode(ISD::SUB, SDLoc(Op), ResTy, Op->getOperand(1), 2116 DAG.getNode(ISD::MUL, SDLoc(Op), ResTy, 2117 Op->getOperand(2), Op->getOperand(3))); 2118 } 2119 case Intrinsic::mips_nlzc_b: 2120 case Intrinsic::mips_nlzc_h: 2121 case Intrinsic::mips_nlzc_w: 2122 case Intrinsic::mips_nlzc_d: 2123 return DAG.getNode(ISD::CTLZ, DL, Op->getValueType(0), Op->getOperand(1)); 2124 case Intrinsic::mips_nor_v: { 2125 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2126 Op->getOperand(1), Op->getOperand(2)); 2127 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2128 } 2129 case Intrinsic::mips_nori_b: { 2130 SDValue Res = DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2131 Op->getOperand(1), 2132 lowerMSASplatImm(Op, 2, DAG)); 2133 return DAG.getNOT(DL, Res, Res->getValueType(0)); 2134 } 2135 case Intrinsic::mips_or_v: 2136 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), Op->getOperand(1), 2137 Op->getOperand(2)); 2138 case Intrinsic::mips_ori_b: 2139 return DAG.getNode(ISD::OR, DL, Op->getValueType(0), 2140 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2141 case Intrinsic::mips_pckev_b: 2142 case Intrinsic::mips_pckev_h: 2143 case Intrinsic::mips_pckev_w: 2144 case Intrinsic::mips_pckev_d: 2145 return DAG.getNode(MipsISD::PCKEV, DL, Op->getValueType(0), 2146 Op->getOperand(1), Op->getOperand(2)); 2147 case Intrinsic::mips_pckod_b: 2148 case Intrinsic::mips_pckod_h: 2149 case Intrinsic::mips_pckod_w: 2150 case Intrinsic::mips_pckod_d: 2151 return DAG.getNode(MipsISD::PCKOD, DL, Op->getValueType(0), 2152 Op->getOperand(1), Op->getOperand(2)); 2153 case Intrinsic::mips_pcnt_b: 2154 case Intrinsic::mips_pcnt_h: 2155 case Intrinsic::mips_pcnt_w: 2156 case Intrinsic::mips_pcnt_d: 2157 return DAG.getNode(ISD::CTPOP, DL, Op->getValueType(0), Op->getOperand(1)); 2158 case Intrinsic::mips_sat_s_b: 2159 case Intrinsic::mips_sat_s_h: 2160 case Intrinsic::mips_sat_s_w: 2161 case Intrinsic::mips_sat_s_d: 2162 case Intrinsic::mips_sat_u_b: 2163 case Intrinsic::mips_sat_u_h: 2164 case Intrinsic::mips_sat_u_w: 2165 case Intrinsic::mips_sat_u_d: { 2166 // Report an error for out of range values. 2167 int64_t Max; 2168 switch (Intrinsic) { 2169 case Intrinsic::mips_sat_s_b: 2170 case Intrinsic::mips_sat_u_b: Max = 7; break; 2171 case Intrinsic::mips_sat_s_h: 2172 case Intrinsic::mips_sat_u_h: Max = 15; break; 2173 case Intrinsic::mips_sat_s_w: 2174 case Intrinsic::mips_sat_u_w: Max = 31; break; 2175 case Intrinsic::mips_sat_s_d: 2176 case Intrinsic::mips_sat_u_d: Max = 63; break; 2177 default: llvm_unreachable("Unmatched intrinsic"); 2178 } 2179 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2180 if (Value < 0 || Value > Max) 2181 report_fatal_error("Immediate out of range"); 2182 return SDValue(); 2183 } 2184 case Intrinsic::mips_shf_b: 2185 case Intrinsic::mips_shf_h: 2186 case Intrinsic::mips_shf_w: { 2187 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2188 if (Value < 0 || Value > 255) 2189 report_fatal_error("Immediate out of range"); 2190 return DAG.getNode(MipsISD::SHF, DL, Op->getValueType(0), 2191 Op->getOperand(2), Op->getOperand(1)); 2192 } 2193 case Intrinsic::mips_sldi_b: 2194 case Intrinsic::mips_sldi_h: 2195 case Intrinsic::mips_sldi_w: 2196 case Intrinsic::mips_sldi_d: { 2197 // Report an error for out of range values. 2198 int64_t Max; 2199 switch (Intrinsic) { 2200 case Intrinsic::mips_sldi_b: Max = 15; break; 2201 case Intrinsic::mips_sldi_h: Max = 7; break; 2202 case Intrinsic::mips_sldi_w: Max = 3; break; 2203 case Intrinsic::mips_sldi_d: Max = 1; break; 2204 default: llvm_unreachable("Unmatched intrinsic"); 2205 } 2206 int64_t Value = cast<ConstantSDNode>(Op->getOperand(3))->getSExtValue(); 2207 if (Value < 0 || Value > Max) 2208 report_fatal_error("Immediate out of range"); 2209 return SDValue(); 2210 } 2211 case Intrinsic::mips_sll_b: 2212 case Intrinsic::mips_sll_h: 2213 case Intrinsic::mips_sll_w: 2214 case Intrinsic::mips_sll_d: 2215 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), Op->getOperand(1), 2216 truncateVecElts(Op, DAG)); 2217 case Intrinsic::mips_slli_b: 2218 case Intrinsic::mips_slli_h: 2219 case Intrinsic::mips_slli_w: 2220 case Intrinsic::mips_slli_d: 2221 return DAG.getNode(ISD::SHL, DL, Op->getValueType(0), 2222 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2223 case Intrinsic::mips_splat_b: 2224 case Intrinsic::mips_splat_h: 2225 case Intrinsic::mips_splat_w: 2226 case Intrinsic::mips_splat_d: 2227 // We can't lower via VECTOR_SHUFFLE because it requires constant shuffle 2228 // masks, nor can we lower via BUILD_VECTOR & EXTRACT_VECTOR_ELT because 2229 // EXTRACT_VECTOR_ELT can't extract i64's on MIPS32. 2230 // Instead we lower to MipsISD::VSHF and match from there. 2231 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2232 lowerMSASplatZExt(Op, 2, DAG), Op->getOperand(1), 2233 Op->getOperand(1)); 2234 case Intrinsic::mips_splati_b: 2235 case Intrinsic::mips_splati_h: 2236 case Intrinsic::mips_splati_w: 2237 case Intrinsic::mips_splati_d: 2238 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2239 lowerMSASplatImm(Op, 2, DAG), Op->getOperand(1), 2240 Op->getOperand(1)); 2241 case Intrinsic::mips_sra_b: 2242 case Intrinsic::mips_sra_h: 2243 case Intrinsic::mips_sra_w: 2244 case Intrinsic::mips_sra_d: 2245 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), Op->getOperand(1), 2246 truncateVecElts(Op, DAG)); 2247 case Intrinsic::mips_srai_b: 2248 case Intrinsic::mips_srai_h: 2249 case Intrinsic::mips_srai_w: 2250 case Intrinsic::mips_srai_d: 2251 return DAG.getNode(ISD::SRA, DL, Op->getValueType(0), 2252 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2253 case Intrinsic::mips_srari_b: 2254 case Intrinsic::mips_srari_h: 2255 case Intrinsic::mips_srari_w: 2256 case Intrinsic::mips_srari_d: { 2257 // Report an error for out of range values. 2258 int64_t Max; 2259 switch (Intrinsic) { 2260 case Intrinsic::mips_srari_b: Max = 7; break; 2261 case Intrinsic::mips_srari_h: Max = 15; break; 2262 case Intrinsic::mips_srari_w: Max = 31; break; 2263 case Intrinsic::mips_srari_d: Max = 63; break; 2264 default: llvm_unreachable("Unmatched intrinsic"); 2265 } 2266 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2267 if (Value < 0 || Value > Max) 2268 report_fatal_error("Immediate out of range"); 2269 return SDValue(); 2270 } 2271 case Intrinsic::mips_srl_b: 2272 case Intrinsic::mips_srl_h: 2273 case Intrinsic::mips_srl_w: 2274 case Intrinsic::mips_srl_d: 2275 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), Op->getOperand(1), 2276 truncateVecElts(Op, DAG)); 2277 case Intrinsic::mips_srli_b: 2278 case Intrinsic::mips_srli_h: 2279 case Intrinsic::mips_srli_w: 2280 case Intrinsic::mips_srli_d: 2281 return DAG.getNode(ISD::SRL, DL, Op->getValueType(0), 2282 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2283 case Intrinsic::mips_srlri_b: 2284 case Intrinsic::mips_srlri_h: 2285 case Intrinsic::mips_srlri_w: 2286 case Intrinsic::mips_srlri_d: { 2287 // Report an error for out of range values. 2288 int64_t Max; 2289 switch (Intrinsic) { 2290 case Intrinsic::mips_srlri_b: Max = 7; break; 2291 case Intrinsic::mips_srlri_h: Max = 15; break; 2292 case Intrinsic::mips_srlri_w: Max = 31; break; 2293 case Intrinsic::mips_srlri_d: Max = 63; break; 2294 default: llvm_unreachable("Unmatched intrinsic"); 2295 } 2296 int64_t Value = cast<ConstantSDNode>(Op->getOperand(2))->getSExtValue(); 2297 if (Value < 0 || Value > Max) 2298 report_fatal_error("Immediate out of range"); 2299 return SDValue(); 2300 } 2301 case Intrinsic::mips_subv_b: 2302 case Intrinsic::mips_subv_h: 2303 case Intrinsic::mips_subv_w: 2304 case Intrinsic::mips_subv_d: 2305 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), Op->getOperand(1), 2306 Op->getOperand(2)); 2307 case Intrinsic::mips_subvi_b: 2308 case Intrinsic::mips_subvi_h: 2309 case Intrinsic::mips_subvi_w: 2310 case Intrinsic::mips_subvi_d: 2311 return DAG.getNode(ISD::SUB, DL, Op->getValueType(0), 2312 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2313 case Intrinsic::mips_vshf_b: 2314 case Intrinsic::mips_vshf_h: 2315 case Intrinsic::mips_vshf_w: 2316 case Intrinsic::mips_vshf_d: 2317 return DAG.getNode(MipsISD::VSHF, DL, Op->getValueType(0), 2318 Op->getOperand(1), Op->getOperand(2), Op->getOperand(3)); 2319 case Intrinsic::mips_xor_v: 2320 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), Op->getOperand(1), 2321 Op->getOperand(2)); 2322 case Intrinsic::mips_xori_b: 2323 return DAG.getNode(ISD::XOR, DL, Op->getValueType(0), 2324 Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); 2325 case Intrinsic::thread_pointer: { 2326 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 2327 return DAG.getNode(MipsISD::ThreadPointer, DL, PtrVT); 2328 } 2329 } 2330 } 2331 2332 static SDValue lowerMSALoadIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2333 const MipsSubtarget &Subtarget) { 2334 SDLoc DL(Op); 2335 SDValue ChainIn = Op->getOperand(0); 2336 SDValue Address = Op->getOperand(2); 2337 SDValue Offset = Op->getOperand(3); 2338 EVT ResTy = Op->getValueType(0); 2339 EVT PtrTy = Address->getValueType(0); 2340 2341 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2342 // however takes an i32 signed constant offset. The actual type of the 2343 // intrinsic is a scaled signed i10. 2344 if (Subtarget.isABI_N64()) 2345 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2346 2347 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2348 return DAG.getLoad(ResTy, DL, ChainIn, Address, MachinePointerInfo(), 2349 Align(16)); 2350 } 2351 2352 SDValue MipsSETargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 2353 SelectionDAG &DAG) const { 2354 unsigned Intr = Op->getConstantOperandVal(1); 2355 switch (Intr) { 2356 default: 2357 return SDValue(); 2358 case Intrinsic::mips_extp: 2359 return lowerDSPIntr(Op, DAG, MipsISD::EXTP); 2360 case Intrinsic::mips_extpdp: 2361 return lowerDSPIntr(Op, DAG, MipsISD::EXTPDP); 2362 case Intrinsic::mips_extr_w: 2363 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_W); 2364 case Intrinsic::mips_extr_r_w: 2365 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_R_W); 2366 case Intrinsic::mips_extr_rs_w: 2367 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_RS_W); 2368 case Intrinsic::mips_extr_s_h: 2369 return lowerDSPIntr(Op, DAG, MipsISD::EXTR_S_H); 2370 case Intrinsic::mips_mthlip: 2371 return lowerDSPIntr(Op, DAG, MipsISD::MTHLIP); 2372 case Intrinsic::mips_mulsaq_s_w_ph: 2373 return lowerDSPIntr(Op, DAG, MipsISD::MULSAQ_S_W_PH); 2374 case Intrinsic::mips_maq_s_w_phl: 2375 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHL); 2376 case Intrinsic::mips_maq_s_w_phr: 2377 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_S_W_PHR); 2378 case Intrinsic::mips_maq_sa_w_phl: 2379 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHL); 2380 case Intrinsic::mips_maq_sa_w_phr: 2381 return lowerDSPIntr(Op, DAG, MipsISD::MAQ_SA_W_PHR); 2382 case Intrinsic::mips_dpaq_s_w_ph: 2383 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_S_W_PH); 2384 case Intrinsic::mips_dpsq_s_w_ph: 2385 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_S_W_PH); 2386 case Intrinsic::mips_dpaq_sa_l_w: 2387 return lowerDSPIntr(Op, DAG, MipsISD::DPAQ_SA_L_W); 2388 case Intrinsic::mips_dpsq_sa_l_w: 2389 return lowerDSPIntr(Op, DAG, MipsISD::DPSQ_SA_L_W); 2390 case Intrinsic::mips_dpaqx_s_w_ph: 2391 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_S_W_PH); 2392 case Intrinsic::mips_dpaqx_sa_w_ph: 2393 return lowerDSPIntr(Op, DAG, MipsISD::DPAQX_SA_W_PH); 2394 case Intrinsic::mips_dpsqx_s_w_ph: 2395 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_S_W_PH); 2396 case Intrinsic::mips_dpsqx_sa_w_ph: 2397 return lowerDSPIntr(Op, DAG, MipsISD::DPSQX_SA_W_PH); 2398 case Intrinsic::mips_ld_b: 2399 case Intrinsic::mips_ld_h: 2400 case Intrinsic::mips_ld_w: 2401 case Intrinsic::mips_ld_d: 2402 return lowerMSALoadIntr(Op, DAG, Intr, Subtarget); 2403 } 2404 } 2405 2406 static SDValue lowerMSAStoreIntr(SDValue Op, SelectionDAG &DAG, unsigned Intr, 2407 const MipsSubtarget &Subtarget) { 2408 SDLoc DL(Op); 2409 SDValue ChainIn = Op->getOperand(0); 2410 SDValue Value = Op->getOperand(2); 2411 SDValue Address = Op->getOperand(3); 2412 SDValue Offset = Op->getOperand(4); 2413 EVT PtrTy = Address->getValueType(0); 2414 2415 // For N64 addresses have the underlying type MVT::i64. This intrinsic 2416 // however takes an i32 signed constant offset. The actual type of the 2417 // intrinsic is a scaled signed i10. 2418 if (Subtarget.isABI_N64()) 2419 Offset = DAG.getNode(ISD::SIGN_EXTEND, DL, PtrTy, Offset); 2420 2421 Address = DAG.getNode(ISD::ADD, DL, PtrTy, Address, Offset); 2422 2423 return DAG.getStore(ChainIn, DL, Value, Address, MachinePointerInfo(), 2424 Align(16)); 2425 } 2426 2427 SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op, 2428 SelectionDAG &DAG) const { 2429 unsigned Intr = Op->getConstantOperandVal(1); 2430 switch (Intr) { 2431 default: 2432 return SDValue(); 2433 case Intrinsic::mips_st_b: 2434 case Intrinsic::mips_st_h: 2435 case Intrinsic::mips_st_w: 2436 case Intrinsic::mips_st_d: 2437 return lowerMSAStoreIntr(Op, DAG, Intr, Subtarget); 2438 } 2439 } 2440 2441 // Lower ISD::EXTRACT_VECTOR_ELT into MipsISD::VEXTRACT_SEXT_ELT. 2442 // 2443 // The non-value bits resulting from ISD::EXTRACT_VECTOR_ELT are undefined. We 2444 // choose to sign-extend but we could have equally chosen zero-extend. The 2445 // DAGCombiner will fold any sign/zero extension of the ISD::EXTRACT_VECTOR_ELT 2446 // result into this node later (possibly changing it to a zero-extend in the 2447 // process). 2448 SDValue MipsSETargetLowering:: 2449 lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const { 2450 SDLoc DL(Op); 2451 EVT ResTy = Op->getValueType(0); 2452 SDValue Op0 = Op->getOperand(0); 2453 EVT VecTy = Op0->getValueType(0); 2454 2455 if (!VecTy.is128BitVector()) 2456 return SDValue(); 2457 2458 if (ResTy.isInteger()) { 2459 SDValue Op1 = Op->getOperand(1); 2460 EVT EltTy = VecTy.getVectorElementType(); 2461 return DAG.getNode(MipsISD::VEXTRACT_SEXT_ELT, DL, ResTy, Op0, Op1, 2462 DAG.getValueType(EltTy)); 2463 } 2464 2465 return Op; 2466 } 2467 2468 static bool isConstantOrUndef(const SDValue Op) { 2469 if (Op->isUndef()) 2470 return true; 2471 if (isa<ConstantSDNode>(Op)) 2472 return true; 2473 if (isa<ConstantFPSDNode>(Op)) 2474 return true; 2475 return false; 2476 } 2477 2478 static bool isConstantOrUndefBUILD_VECTOR(const BuildVectorSDNode *Op) { 2479 for (unsigned i = 0; i < Op->getNumOperands(); ++i) 2480 if (isConstantOrUndef(Op->getOperand(i))) 2481 return true; 2482 return false; 2483 } 2484 2485 // Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the 2486 // backend. 2487 // 2488 // Lowers according to the following rules: 2489 // - Constant splats are legal as-is as long as the SplatBitSize is a power of 2490 // 2 less than or equal to 64 and the value fits into a signed 10-bit 2491 // immediate 2492 // - Constant splats are lowered to bitconverted BUILD_VECTORs if SplatBitSize 2493 // is a power of 2 less than or equal to 64 and the value does not fit into a 2494 // signed 10-bit immediate 2495 // - Non-constant splats are legal as-is. 2496 // - Non-constant non-splats are lowered to sequences of INSERT_VECTOR_ELT. 2497 // - All others are illegal and must be expanded. 2498 SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, 2499 SelectionDAG &DAG) const { 2500 BuildVectorSDNode *Node = cast<BuildVectorSDNode>(Op); 2501 EVT ResTy = Op->getValueType(0); 2502 SDLoc DL(Op); 2503 APInt SplatValue, SplatUndef; 2504 unsigned SplatBitSize; 2505 bool HasAnyUndefs; 2506 2507 if (!Subtarget.hasMSA() || !ResTy.is128BitVector()) 2508 return SDValue(); 2509 2510 if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 2511 HasAnyUndefs, 8, 2512 !Subtarget.isLittle()) && SplatBitSize <= 64) { 2513 // We can only cope with 8, 16, 32, or 64-bit elements 2514 if (SplatBitSize != 8 && SplatBitSize != 16 && SplatBitSize != 32 && 2515 SplatBitSize != 64) 2516 return SDValue(); 2517 2518 // If the value isn't an integer type we will have to bitcast 2519 // from an integer type first. Also, if there are any undefs, we must 2520 // lower them to defined values first. 2521 if (ResTy.isInteger() && !HasAnyUndefs) 2522 return Op; 2523 2524 EVT ViaVecTy; 2525 2526 switch (SplatBitSize) { 2527 default: 2528 return SDValue(); 2529 case 8: 2530 ViaVecTy = MVT::v16i8; 2531 break; 2532 case 16: 2533 ViaVecTy = MVT::v8i16; 2534 break; 2535 case 32: 2536 ViaVecTy = MVT::v4i32; 2537 break; 2538 case 64: 2539 // There's no fill.d to fall back on for 64-bit values 2540 return SDValue(); 2541 } 2542 2543 // SelectionDAG::getConstant will promote SplatValue appropriately. 2544 SDValue Result = DAG.getConstant(SplatValue, DL, ViaVecTy); 2545 2546 // Bitcast to the type we originally wanted 2547 if (ViaVecTy != ResTy) 2548 Result = DAG.getNode(ISD::BITCAST, SDLoc(Node), ResTy, Result); 2549 2550 return Result; 2551 } else if (DAG.isSplatValue(Op, /* AllowUndefs */ false)) 2552 return Op; 2553 else if (!isConstantOrUndefBUILD_VECTOR(Node)) { 2554 // Use INSERT_VECTOR_ELT operations rather than expand to stores. 2555 // The resulting code is the same length as the expansion, but it doesn't 2556 // use memory operations 2557 EVT ResTy = Node->getValueType(0); 2558 2559 assert(ResTy.isVector()); 2560 2561 unsigned NumElts = ResTy.getVectorNumElements(); 2562 SDValue Vector = DAG.getUNDEF(ResTy); 2563 for (unsigned i = 0; i < NumElts; ++i) { 2564 Vector = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ResTy, Vector, 2565 Node->getOperand(i), 2566 DAG.getConstant(i, DL, MVT::i32)); 2567 } 2568 return Vector; 2569 } 2570 2571 return SDValue(); 2572 } 2573 2574 // Lower VECTOR_SHUFFLE into SHF (if possible). 2575 // 2576 // SHF splits the vector into blocks of four elements, then shuffles these 2577 // elements according to a <4 x i2> constant (encoded as an integer immediate). 2578 // 2579 // It is therefore possible to lower into SHF when the mask takes the form: 2580 // <a, b, c, d, a+4, b+4, c+4, d+4, a+8, b+8, c+8, d+8, ...> 2581 // When undef's appear they are treated as if they were whatever value is 2582 // necessary in order to fit the above forms. 2583 // 2584 // For example: 2585 // %2 = shufflevector <8 x i16> %0, <8 x i16> undef, 2586 // <8 x i32> <i32 3, i32 2, i32 1, i32 0, 2587 // i32 7, i32 6, i32 5, i32 4> 2588 // is lowered to: 2589 // (SHF_H $w0, $w1, 27) 2590 // where the 27 comes from: 2591 // 3 + (2 << 2) + (1 << 4) + (0 << 6) 2592 static SDValue lowerVECTOR_SHUFFLE_SHF(SDValue Op, EVT ResTy, 2593 SmallVector<int, 16> Indices, 2594 SelectionDAG &DAG) { 2595 int SHFIndices[4] = { -1, -1, -1, -1 }; 2596 2597 if (Indices.size() < 4) 2598 return SDValue(); 2599 2600 for (unsigned i = 0; i < 4; ++i) { 2601 for (unsigned j = i; j < Indices.size(); j += 4) { 2602 int Idx = Indices[j]; 2603 2604 // Convert from vector index to 4-element subvector index 2605 // If an index refers to an element outside of the subvector then give up 2606 if (Idx != -1) { 2607 Idx -= 4 * (j / 4); 2608 if (Idx < 0 || Idx >= 4) 2609 return SDValue(); 2610 } 2611 2612 // If the mask has an undef, replace it with the current index. 2613 // Note that it might still be undef if the current index is also undef 2614 if (SHFIndices[i] == -1) 2615 SHFIndices[i] = Idx; 2616 2617 // Check that non-undef values are the same as in the mask. If they 2618 // aren't then give up 2619 if (!(Idx == -1 || Idx == SHFIndices[i])) 2620 return SDValue(); 2621 } 2622 } 2623 2624 // Calculate the immediate. Replace any remaining undefs with zero 2625 APInt Imm(32, 0); 2626 for (int i = 3; i >= 0; --i) { 2627 int Idx = SHFIndices[i]; 2628 2629 if (Idx == -1) 2630 Idx = 0; 2631 2632 Imm <<= 2; 2633 Imm |= Idx & 0x3; 2634 } 2635 2636 SDLoc DL(Op); 2637 return DAG.getNode(MipsISD::SHF, DL, ResTy, 2638 DAG.getTargetConstant(Imm, DL, MVT::i32), 2639 Op->getOperand(0)); 2640 } 2641 2642 /// Determine whether a range fits a regular pattern of values. 2643 /// This function accounts for the possibility of jumping over the End iterator. 2644 template <typename ValType> 2645 static bool 2646 fitsRegularPattern(typename SmallVectorImpl<ValType>::const_iterator Begin, 2647 unsigned CheckStride, 2648 typename SmallVectorImpl<ValType>::const_iterator End, 2649 ValType ExpectedIndex, unsigned ExpectedIndexStride) { 2650 auto &I = Begin; 2651 2652 while (I != End) { 2653 if (*I != -1 && *I != ExpectedIndex) 2654 return false; 2655 ExpectedIndex += ExpectedIndexStride; 2656 2657 // Incrementing past End is undefined behaviour so we must increment one 2658 // step at a time and check for End at each step. 2659 for (unsigned n = 0; n < CheckStride && I != End; ++n, ++I) 2660 ; // Empty loop body. 2661 } 2662 return true; 2663 } 2664 2665 // Determine whether VECTOR_SHUFFLE is a SPLATI. 2666 // 2667 // It is a SPLATI when the mask is: 2668 // <x, x, x, ...> 2669 // where x is any valid index. 2670 // 2671 // When undef's appear in the mask they are treated as if they were whatever 2672 // value is necessary in order to fit the above form. 2673 static bool isVECTOR_SHUFFLE_SPLATI(SDValue Op, EVT ResTy, 2674 SmallVector<int, 16> Indices, 2675 SelectionDAG &DAG) { 2676 assert((Indices.size() % 2) == 0); 2677 2678 int SplatIndex = -1; 2679 for (const auto &V : Indices) { 2680 if (V != -1) { 2681 SplatIndex = V; 2682 break; 2683 } 2684 } 2685 2686 return fitsRegularPattern<int>(Indices.begin(), 1, Indices.end(), SplatIndex, 2687 0); 2688 } 2689 2690 // Lower VECTOR_SHUFFLE into ILVEV (if possible). 2691 // 2692 // ILVEV interleaves the even elements from each vector. 2693 // 2694 // It is possible to lower into ILVEV when the mask consists of two of the 2695 // following forms interleaved: 2696 // <0, 2, 4, ...> 2697 // <n, n+2, n+4, ...> 2698 // where n is the number of elements in the vector. 2699 // For example: 2700 // <0, 0, 2, 2, 4, 4, ...> 2701 // <0, n, 2, n+2, 4, n+4, ...> 2702 // 2703 // When undef's appear in the mask they are treated as if they were whatever 2704 // value is necessary in order to fit the above forms. 2705 static SDValue lowerVECTOR_SHUFFLE_ILVEV(SDValue Op, EVT ResTy, 2706 SmallVector<int, 16> Indices, 2707 SelectionDAG &DAG) { 2708 assert((Indices.size() % 2) == 0); 2709 2710 SDValue Wt; 2711 SDValue Ws; 2712 const auto &Begin = Indices.begin(); 2713 const auto &End = Indices.end(); 2714 2715 // Check even elements are taken from the even elements of one half or the 2716 // other and pick an operand accordingly. 2717 if (fitsRegularPattern<int>(Begin, 2, End, 0, 2)) 2718 Wt = Op->getOperand(0); 2719 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 2)) 2720 Wt = Op->getOperand(1); 2721 else 2722 return SDValue(); 2723 2724 // Check odd elements are taken from the even elements of one half or the 2725 // other and pick an operand accordingly. 2726 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 2)) 2727 Ws = Op->getOperand(0); 2728 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 2)) 2729 Ws = Op->getOperand(1); 2730 else 2731 return SDValue(); 2732 2733 return DAG.getNode(MipsISD::ILVEV, SDLoc(Op), ResTy, Ws, Wt); 2734 } 2735 2736 // Lower VECTOR_SHUFFLE into ILVOD (if possible). 2737 // 2738 // ILVOD interleaves the odd elements from each vector. 2739 // 2740 // It is possible to lower into ILVOD when the mask consists of two of the 2741 // following forms interleaved: 2742 // <1, 3, 5, ...> 2743 // <n+1, n+3, n+5, ...> 2744 // where n is the number of elements in the vector. 2745 // For example: 2746 // <1, 1, 3, 3, 5, 5, ...> 2747 // <1, n+1, 3, n+3, 5, n+5, ...> 2748 // 2749 // When undef's appear in the mask they are treated as if they were whatever 2750 // value is necessary in order to fit the above forms. 2751 static SDValue lowerVECTOR_SHUFFLE_ILVOD(SDValue Op, EVT ResTy, 2752 SmallVector<int, 16> Indices, 2753 SelectionDAG &DAG) { 2754 assert((Indices.size() % 2) == 0); 2755 2756 SDValue Wt; 2757 SDValue Ws; 2758 const auto &Begin = Indices.begin(); 2759 const auto &End = Indices.end(); 2760 2761 // Check even elements are taken from the odd elements of one half or the 2762 // other and pick an operand accordingly. 2763 if (fitsRegularPattern<int>(Begin, 2, End, 1, 2)) 2764 Wt = Op->getOperand(0); 2765 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + 1, 2)) 2766 Wt = Op->getOperand(1); 2767 else 2768 return SDValue(); 2769 2770 // Check odd elements are taken from the odd elements of one half or the 2771 // other and pick an operand accordingly. 2772 if (fitsRegularPattern<int>(Begin + 1, 2, End, 1, 2)) 2773 Ws = Op->getOperand(0); 2774 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + 1, 2)) 2775 Ws = Op->getOperand(1); 2776 else 2777 return SDValue(); 2778 2779 return DAG.getNode(MipsISD::ILVOD, SDLoc(Op), ResTy, Ws, Wt); 2780 } 2781 2782 // Lower VECTOR_SHUFFLE into ILVR (if possible). 2783 // 2784 // ILVR interleaves consecutive elements from the right (lowest-indexed) half of 2785 // each vector. 2786 // 2787 // It is possible to lower into ILVR when the mask consists of two of the 2788 // following forms interleaved: 2789 // <0, 1, 2, ...> 2790 // <n, n+1, n+2, ...> 2791 // where n is the number of elements in the vector. 2792 // For example: 2793 // <0, 0, 1, 1, 2, 2, ...> 2794 // <0, n, 1, n+1, 2, n+2, ...> 2795 // 2796 // When undef's appear in the mask they are treated as if they were whatever 2797 // value is necessary in order to fit the above forms. 2798 static SDValue lowerVECTOR_SHUFFLE_ILVR(SDValue Op, EVT ResTy, 2799 SmallVector<int, 16> Indices, 2800 SelectionDAG &DAG) { 2801 assert((Indices.size() % 2) == 0); 2802 2803 SDValue Wt; 2804 SDValue Ws; 2805 const auto &Begin = Indices.begin(); 2806 const auto &End = Indices.end(); 2807 2808 // Check even elements are taken from the right (lowest-indexed) elements of 2809 // one half or the other and pick an operand accordingly. 2810 if (fitsRegularPattern<int>(Begin, 2, End, 0, 1)) 2811 Wt = Op->getOperand(0); 2812 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size(), 1)) 2813 Wt = Op->getOperand(1); 2814 else 2815 return SDValue(); 2816 2817 // Check odd elements are taken from the right (lowest-indexed) elements of 2818 // one half or the other and pick an operand accordingly. 2819 if (fitsRegularPattern<int>(Begin + 1, 2, End, 0, 1)) 2820 Ws = Op->getOperand(0); 2821 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size(), 1)) 2822 Ws = Op->getOperand(1); 2823 else 2824 return SDValue(); 2825 2826 return DAG.getNode(MipsISD::ILVR, SDLoc(Op), ResTy, Ws, Wt); 2827 } 2828 2829 // Lower VECTOR_SHUFFLE into ILVL (if possible). 2830 // 2831 // ILVL interleaves consecutive elements from the left (highest-indexed) half 2832 // of each vector. 2833 // 2834 // It is possible to lower into ILVL when the mask consists of two of the 2835 // following forms interleaved: 2836 // <x, x+1, x+2, ...> 2837 // <n+x, n+x+1, n+x+2, ...> 2838 // where n is the number of elements in the vector and x is half n. 2839 // For example: 2840 // <x, x, x+1, x+1, x+2, x+2, ...> 2841 // <x, n+x, x+1, n+x+1, x+2, n+x+2, ...> 2842 // 2843 // When undef's appear in the mask they are treated as if they were whatever 2844 // value is necessary in order to fit the above forms. 2845 static SDValue lowerVECTOR_SHUFFLE_ILVL(SDValue Op, EVT ResTy, 2846 SmallVector<int, 16> Indices, 2847 SelectionDAG &DAG) { 2848 assert((Indices.size() % 2) == 0); 2849 2850 unsigned HalfSize = Indices.size() / 2; 2851 SDValue Wt; 2852 SDValue Ws; 2853 const auto &Begin = Indices.begin(); 2854 const auto &End = Indices.end(); 2855 2856 // Check even elements are taken from the left (highest-indexed) elements of 2857 // one half or the other and pick an operand accordingly. 2858 if (fitsRegularPattern<int>(Begin, 2, End, HalfSize, 1)) 2859 Wt = Op->getOperand(0); 2860 else if (fitsRegularPattern<int>(Begin, 2, End, Indices.size() + HalfSize, 1)) 2861 Wt = Op->getOperand(1); 2862 else 2863 return SDValue(); 2864 2865 // Check odd elements are taken from the left (highest-indexed) elements of 2866 // one half or the other and pick an operand accordingly. 2867 if (fitsRegularPattern<int>(Begin + 1, 2, End, HalfSize, 1)) 2868 Ws = Op->getOperand(0); 2869 else if (fitsRegularPattern<int>(Begin + 1, 2, End, Indices.size() + HalfSize, 2870 1)) 2871 Ws = Op->getOperand(1); 2872 else 2873 return SDValue(); 2874 2875 return DAG.getNode(MipsISD::ILVL, SDLoc(Op), ResTy, Ws, Wt); 2876 } 2877 2878 // Lower VECTOR_SHUFFLE into PCKEV (if possible). 2879 // 2880 // PCKEV copies the even elements of each vector into the result vector. 2881 // 2882 // It is possible to lower into PCKEV when the mask consists of two of the 2883 // following forms concatenated: 2884 // <0, 2, 4, ...> 2885 // <n, n+2, n+4, ...> 2886 // where n is the number of elements in the vector. 2887 // For example: 2888 // <0, 2, 4, ..., 0, 2, 4, ...> 2889 // <0, 2, 4, ..., n, n+2, n+4, ...> 2890 // 2891 // When undef's appear in the mask they are treated as if they were whatever 2892 // value is necessary in order to fit the above forms. 2893 static SDValue lowerVECTOR_SHUFFLE_PCKEV(SDValue Op, EVT ResTy, 2894 SmallVector<int, 16> Indices, 2895 SelectionDAG &DAG) { 2896 assert((Indices.size() % 2) == 0); 2897 2898 SDValue Wt; 2899 SDValue Ws; 2900 const auto &Begin = Indices.begin(); 2901 const auto &Mid = Indices.begin() + Indices.size() / 2; 2902 const auto &End = Indices.end(); 2903 2904 if (fitsRegularPattern<int>(Begin, 1, Mid, 0, 2)) 2905 Wt = Op->getOperand(0); 2906 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size(), 2)) 2907 Wt = Op->getOperand(1); 2908 else 2909 return SDValue(); 2910 2911 if (fitsRegularPattern<int>(Mid, 1, End, 0, 2)) 2912 Ws = Op->getOperand(0); 2913 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size(), 2)) 2914 Ws = Op->getOperand(1); 2915 else 2916 return SDValue(); 2917 2918 return DAG.getNode(MipsISD::PCKEV, SDLoc(Op), ResTy, Ws, Wt); 2919 } 2920 2921 // Lower VECTOR_SHUFFLE into PCKOD (if possible). 2922 // 2923 // PCKOD copies the odd elements of each vector into the result vector. 2924 // 2925 // It is possible to lower into PCKOD when the mask consists of two of the 2926 // following forms concatenated: 2927 // <1, 3, 5, ...> 2928 // <n+1, n+3, n+5, ...> 2929 // where n is the number of elements in the vector. 2930 // For example: 2931 // <1, 3, 5, ..., 1, 3, 5, ...> 2932 // <1, 3, 5, ..., n+1, n+3, n+5, ...> 2933 // 2934 // When undef's appear in the mask they are treated as if they were whatever 2935 // value is necessary in order to fit the above forms. 2936 static SDValue lowerVECTOR_SHUFFLE_PCKOD(SDValue Op, EVT ResTy, 2937 SmallVector<int, 16> Indices, 2938 SelectionDAG &DAG) { 2939 assert((Indices.size() % 2) == 0); 2940 2941 SDValue Wt; 2942 SDValue Ws; 2943 const auto &Begin = Indices.begin(); 2944 const auto &Mid = Indices.begin() + Indices.size() / 2; 2945 const auto &End = Indices.end(); 2946 2947 if (fitsRegularPattern<int>(Begin, 1, Mid, 1, 2)) 2948 Wt = Op->getOperand(0); 2949 else if (fitsRegularPattern<int>(Begin, 1, Mid, Indices.size() + 1, 2)) 2950 Wt = Op->getOperand(1); 2951 else 2952 return SDValue(); 2953 2954 if (fitsRegularPattern<int>(Mid, 1, End, 1, 2)) 2955 Ws = Op->getOperand(0); 2956 else if (fitsRegularPattern<int>(Mid, 1, End, Indices.size() + 1, 2)) 2957 Ws = Op->getOperand(1); 2958 else 2959 return SDValue(); 2960 2961 return DAG.getNode(MipsISD::PCKOD, SDLoc(Op), ResTy, Ws, Wt); 2962 } 2963 2964 // Lower VECTOR_SHUFFLE into VSHF. 2965 // 2966 // This mostly consists of converting the shuffle indices in Indices into a 2967 // BUILD_VECTOR and adding it as an operand to the resulting VSHF. There is 2968 // also code to eliminate unused operands of the VECTOR_SHUFFLE. For example, 2969 // if the type is v8i16 and all the indices are less than 8 then the second 2970 // operand is unused and can be replaced with anything. We choose to replace it 2971 // with the used operand since this reduces the number of instructions overall. 2972 // 2973 // NOTE: SPLATI shuffle masks may contain UNDEFs, since isSPLATI() treats 2974 // UNDEFs as same as SPLATI index. 2975 // For other instances we use the last valid index if UNDEF is 2976 // encountered. 2977 static SDValue lowerVECTOR_SHUFFLE_VSHF(SDValue Op, EVT ResTy, 2978 const SmallVector<int, 16> &Indices, 2979 const bool isSPLATI, 2980 SelectionDAG &DAG) { 2981 SmallVector<SDValue, 16> Ops; 2982 SDValue Op0; 2983 SDValue Op1; 2984 EVT MaskVecTy = ResTy.changeVectorElementTypeToInteger(); 2985 EVT MaskEltTy = MaskVecTy.getVectorElementType(); 2986 bool Using1stVec = false; 2987 bool Using2ndVec = false; 2988 SDLoc DL(Op); 2989 int ResTyNumElts = ResTy.getVectorNumElements(); 2990 2991 assert(Indices[0] >= 0 && 2992 "shuffle mask starts with an UNDEF, which is not expected"); 2993 2994 for (int i = 0; i < ResTyNumElts; ++i) { 2995 // Idx == -1 means UNDEF 2996 int Idx = Indices[i]; 2997 2998 if (0 <= Idx && Idx < ResTyNumElts) 2999 Using1stVec = true; 3000 if (ResTyNumElts <= Idx && Idx < ResTyNumElts * 2) 3001 Using2ndVec = true; 3002 } 3003 int LastValidIndex = 0; 3004 for (size_t i = 0; i < Indices.size(); i++) { 3005 int Idx = Indices[i]; 3006 if (Idx < 0) { 3007 // Continue using splati index or use the last valid index. 3008 Idx = isSPLATI ? Indices[0] : LastValidIndex; 3009 } else { 3010 LastValidIndex = Idx; 3011 } 3012 Ops.push_back(DAG.getTargetConstant(Idx, DL, MaskEltTy)); 3013 } 3014 3015 SDValue MaskVec = DAG.getBuildVector(MaskVecTy, DL, Ops); 3016 3017 if (Using1stVec && Using2ndVec) { 3018 Op0 = Op->getOperand(0); 3019 Op1 = Op->getOperand(1); 3020 } else if (Using1stVec) 3021 Op0 = Op1 = Op->getOperand(0); 3022 else if (Using2ndVec) 3023 Op0 = Op1 = Op->getOperand(1); 3024 else 3025 llvm_unreachable("shuffle vector mask references neither vector operand?"); 3026 3027 // VECTOR_SHUFFLE concatenates the vectors in an vectorwise fashion. 3028 // <0b00, 0b01> + <0b10, 0b11> -> <0b00, 0b01, 0b10, 0b11> 3029 // VSHF concatenates the vectors in a bitwise fashion: 3030 // <0b00, 0b01> + <0b10, 0b11> -> 3031 // 0b0100 + 0b1110 -> 0b01001110 3032 // <0b10, 0b11, 0b00, 0b01> 3033 // We must therefore swap the operands to get the correct result. 3034 return DAG.getNode(MipsISD::VSHF, DL, ResTy, MaskVec, Op1, Op0); 3035 } 3036 3037 // Lower VECTOR_SHUFFLE into one of a number of instructions depending on the 3038 // indices in the shuffle. 3039 SDValue MipsSETargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 3040 SelectionDAG &DAG) const { 3041 ShuffleVectorSDNode *Node = cast<ShuffleVectorSDNode>(Op); 3042 EVT ResTy = Op->getValueType(0); 3043 3044 if (!ResTy.is128BitVector()) 3045 return SDValue(); 3046 3047 int ResTyNumElts = ResTy.getVectorNumElements(); 3048 SmallVector<int, 16> Indices; 3049 3050 for (int i = 0; i < ResTyNumElts; ++i) 3051 Indices.push_back(Node->getMaskElt(i)); 3052 3053 // splati.[bhwd] is preferable to the others but is matched from 3054 // MipsISD::VSHF. 3055 if (isVECTOR_SHUFFLE_SPLATI(Op, ResTy, Indices, DAG)) 3056 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, true, DAG); 3057 SDValue Result; 3058 if ((Result = lowerVECTOR_SHUFFLE_ILVEV(Op, ResTy, Indices, DAG))) 3059 return Result; 3060 if ((Result = lowerVECTOR_SHUFFLE_ILVOD(Op, ResTy, Indices, DAG))) 3061 return Result; 3062 if ((Result = lowerVECTOR_SHUFFLE_ILVL(Op, ResTy, Indices, DAG))) 3063 return Result; 3064 if ((Result = lowerVECTOR_SHUFFLE_ILVR(Op, ResTy, Indices, DAG))) 3065 return Result; 3066 if ((Result = lowerVECTOR_SHUFFLE_PCKEV(Op, ResTy, Indices, DAG))) 3067 return Result; 3068 if ((Result = lowerVECTOR_SHUFFLE_PCKOD(Op, ResTy, Indices, DAG))) 3069 return Result; 3070 if ((Result = lowerVECTOR_SHUFFLE_SHF(Op, ResTy, Indices, DAG))) 3071 return Result; 3072 return lowerVECTOR_SHUFFLE_VSHF(Op, ResTy, Indices, false, DAG); 3073 } 3074 3075 MachineBasicBlock * 3076 MipsSETargetLowering::emitBPOSGE32(MachineInstr &MI, 3077 MachineBasicBlock *BB) const { 3078 // $bb: 3079 // bposge32_pseudo $vr0 3080 // => 3081 // $bb: 3082 // bposge32 $tbb 3083 // $fbb: 3084 // li $vr2, 0 3085 // b $sink 3086 // $tbb: 3087 // li $vr1, 1 3088 // $sink: 3089 // $vr0 = phi($vr2, $fbb, $vr1, $tbb) 3090 3091 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3092 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3093 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3094 DebugLoc DL = MI.getDebugLoc(); 3095 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3096 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3097 MachineFunction *F = BB->getParent(); 3098 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3099 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3100 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3101 F->insert(It, FBB); 3102 F->insert(It, TBB); 3103 F->insert(It, Sink); 3104 3105 // Transfer the remainder of BB and its successor edges to Sink. 3106 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3107 BB->end()); 3108 Sink->transferSuccessorsAndUpdatePHIs(BB); 3109 3110 // Add successors. 3111 BB->addSuccessor(FBB); 3112 BB->addSuccessor(TBB); 3113 FBB->addSuccessor(Sink); 3114 TBB->addSuccessor(Sink); 3115 3116 // Insert the real bposge32 instruction to $BB. 3117 BuildMI(BB, DL, TII->get(Mips::BPOSGE32)).addMBB(TBB); 3118 // Insert the real bposge32c instruction to $BB. 3119 BuildMI(BB, DL, TII->get(Mips::BPOSGE32C_MMR3)).addMBB(TBB); 3120 3121 // Fill $FBB. 3122 Register VR2 = RegInfo.createVirtualRegister(RC); 3123 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), VR2) 3124 .addReg(Mips::ZERO).addImm(0); 3125 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3126 3127 // Fill $TBB. 3128 Register VR1 = RegInfo.createVirtualRegister(RC); 3129 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), VR1) 3130 .addReg(Mips::ZERO).addImm(1); 3131 3132 // Insert phi function to $Sink. 3133 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3134 MI.getOperand(0).getReg()) 3135 .addReg(VR2) 3136 .addMBB(FBB) 3137 .addReg(VR1) 3138 .addMBB(TBB); 3139 3140 MI.eraseFromParent(); // The pseudo instruction is gone now. 3141 return Sink; 3142 } 3143 3144 MachineBasicBlock *MipsSETargetLowering::emitMSACBranchPseudo( 3145 MachineInstr &MI, MachineBasicBlock *BB, unsigned BranchOp) const { 3146 // $bb: 3147 // vany_nonzero $rd, $ws 3148 // => 3149 // $bb: 3150 // bnz.b $ws, $tbb 3151 // b $fbb 3152 // $fbb: 3153 // li $rd1, 0 3154 // b $sink 3155 // $tbb: 3156 // li $rd2, 1 3157 // $sink: 3158 // $rd = phi($rd1, $fbb, $rd2, $tbb) 3159 3160 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3161 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3162 const TargetRegisterClass *RC = &Mips::GPR32RegClass; 3163 DebugLoc DL = MI.getDebugLoc(); 3164 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3165 MachineFunction::iterator It = std::next(MachineFunction::iterator(BB)); 3166 MachineFunction *F = BB->getParent(); 3167 MachineBasicBlock *FBB = F->CreateMachineBasicBlock(LLVM_BB); 3168 MachineBasicBlock *TBB = F->CreateMachineBasicBlock(LLVM_BB); 3169 MachineBasicBlock *Sink = F->CreateMachineBasicBlock(LLVM_BB); 3170 F->insert(It, FBB); 3171 F->insert(It, TBB); 3172 F->insert(It, Sink); 3173 3174 // Transfer the remainder of BB and its successor edges to Sink. 3175 Sink->splice(Sink->begin(), BB, std::next(MachineBasicBlock::iterator(MI)), 3176 BB->end()); 3177 Sink->transferSuccessorsAndUpdatePHIs(BB); 3178 3179 // Add successors. 3180 BB->addSuccessor(FBB); 3181 BB->addSuccessor(TBB); 3182 FBB->addSuccessor(Sink); 3183 TBB->addSuccessor(Sink); 3184 3185 // Insert the real bnz.b instruction to $BB. 3186 BuildMI(BB, DL, TII->get(BranchOp)) 3187 .addReg(MI.getOperand(1).getReg()) 3188 .addMBB(TBB); 3189 3190 // Fill $FBB. 3191 Register RD1 = RegInfo.createVirtualRegister(RC); 3192 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::ADDiu), RD1) 3193 .addReg(Mips::ZERO).addImm(0); 3194 BuildMI(*FBB, FBB->end(), DL, TII->get(Mips::B)).addMBB(Sink); 3195 3196 // Fill $TBB. 3197 Register RD2 = RegInfo.createVirtualRegister(RC); 3198 BuildMI(*TBB, TBB->end(), DL, TII->get(Mips::ADDiu), RD2) 3199 .addReg(Mips::ZERO).addImm(1); 3200 3201 // Insert phi function to $Sink. 3202 BuildMI(*Sink, Sink->begin(), DL, TII->get(Mips::PHI), 3203 MI.getOperand(0).getReg()) 3204 .addReg(RD1) 3205 .addMBB(FBB) 3206 .addReg(RD2) 3207 .addMBB(TBB); 3208 3209 MI.eraseFromParent(); // The pseudo instruction is gone now. 3210 return Sink; 3211 } 3212 3213 // Emit the COPY_FW pseudo instruction. 3214 // 3215 // copy_fw_pseudo $fd, $ws, n 3216 // => 3217 // copy_u_w $rt, $ws, $n 3218 // mtc1 $rt, $fd 3219 // 3220 // When n is zero, the equivalent operation can be performed with (potentially) 3221 // zero instructions due to register overlaps. This optimization is never valid 3222 // for lane 1 because it would require FR=0 mode which isn't supported by MSA. 3223 MachineBasicBlock * 3224 MipsSETargetLowering::emitCOPY_FW(MachineInstr &MI, 3225 MachineBasicBlock *BB) const { 3226 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3227 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3228 DebugLoc DL = MI.getDebugLoc(); 3229 Register Fd = MI.getOperand(0).getReg(); 3230 Register Ws = MI.getOperand(1).getReg(); 3231 unsigned Lane = MI.getOperand(2).getImm(); 3232 3233 if (Lane == 0) { 3234 unsigned Wt = Ws; 3235 if (!Subtarget.useOddSPReg()) { 3236 // We must copy to an even-numbered MSA register so that the 3237 // single-precision sub-register is also guaranteed to be even-numbered. 3238 Wt = RegInfo.createVirtualRegister(&Mips::MSA128WEvensRegClass); 3239 3240 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Wt).addReg(Ws); 3241 } 3242 3243 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3244 } else { 3245 Register Wt = RegInfo.createVirtualRegister( 3246 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3247 : &Mips::MSA128WEvensRegClass); 3248 3249 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wt).addReg(Ws).addImm(Lane); 3250 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_lo); 3251 } 3252 3253 MI.eraseFromParent(); // The pseudo instruction is gone now. 3254 return BB; 3255 } 3256 3257 // Emit the COPY_FD pseudo instruction. 3258 // 3259 // copy_fd_pseudo $fd, $ws, n 3260 // => 3261 // splati.d $wt, $ws, $n 3262 // copy $fd, $wt:sub_64 3263 // 3264 // When n is zero, the equivalent operation can be performed with (potentially) 3265 // zero instructions due to register overlaps. This optimization is always 3266 // valid because FR=1 mode which is the only supported mode in MSA. 3267 MachineBasicBlock * 3268 MipsSETargetLowering::emitCOPY_FD(MachineInstr &MI, 3269 MachineBasicBlock *BB) const { 3270 assert(Subtarget.isFP64bit()); 3271 3272 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3273 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3274 Register Fd = MI.getOperand(0).getReg(); 3275 Register Ws = MI.getOperand(1).getReg(); 3276 unsigned Lane = MI.getOperand(2).getImm() * 2; 3277 DebugLoc DL = MI.getDebugLoc(); 3278 3279 if (Lane == 0) 3280 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Ws, 0, Mips::sub_64); 3281 else { 3282 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3283 3284 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wt).addReg(Ws).addImm(1); 3285 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Fd).addReg(Wt, 0, Mips::sub_64); 3286 } 3287 3288 MI.eraseFromParent(); // The pseudo instruction is gone now. 3289 return BB; 3290 } 3291 3292 // Emit the INSERT_FW pseudo instruction. 3293 // 3294 // insert_fw_pseudo $wd, $wd_in, $n, $fs 3295 // => 3296 // subreg_to_reg $wt:sub_lo, $fs 3297 // insve_w $wd[$n], $wd_in, $wt[0] 3298 MachineBasicBlock * 3299 MipsSETargetLowering::emitINSERT_FW(MachineInstr &MI, 3300 MachineBasicBlock *BB) const { 3301 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3302 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3303 DebugLoc DL = MI.getDebugLoc(); 3304 Register Wd = MI.getOperand(0).getReg(); 3305 Register Wd_in = MI.getOperand(1).getReg(); 3306 unsigned Lane = MI.getOperand(2).getImm(); 3307 Register Fs = MI.getOperand(3).getReg(); 3308 Register Wt = RegInfo.createVirtualRegister( 3309 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3310 : &Mips::MSA128WEvensRegClass); 3311 3312 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3313 .addImm(0) 3314 .addReg(Fs) 3315 .addImm(Mips::sub_lo); 3316 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_W), Wd) 3317 .addReg(Wd_in) 3318 .addImm(Lane) 3319 .addReg(Wt) 3320 .addImm(0); 3321 3322 MI.eraseFromParent(); // The pseudo instruction is gone now. 3323 return BB; 3324 } 3325 3326 // Emit the INSERT_FD pseudo instruction. 3327 // 3328 // insert_fd_pseudo $wd, $fs, n 3329 // => 3330 // subreg_to_reg $wt:sub_64, $fs 3331 // insve_d $wd[$n], $wd_in, $wt[0] 3332 MachineBasicBlock * 3333 MipsSETargetLowering::emitINSERT_FD(MachineInstr &MI, 3334 MachineBasicBlock *BB) const { 3335 assert(Subtarget.isFP64bit()); 3336 3337 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3338 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3339 DebugLoc DL = MI.getDebugLoc(); 3340 Register Wd = MI.getOperand(0).getReg(); 3341 Register Wd_in = MI.getOperand(1).getReg(); 3342 unsigned Lane = MI.getOperand(2).getImm(); 3343 Register Fs = MI.getOperand(3).getReg(); 3344 Register Wt = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3345 3346 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3347 .addImm(0) 3348 .addReg(Fs) 3349 .addImm(Mips::sub_64); 3350 BuildMI(*BB, MI, DL, TII->get(Mips::INSVE_D), Wd) 3351 .addReg(Wd_in) 3352 .addImm(Lane) 3353 .addReg(Wt) 3354 .addImm(0); 3355 3356 MI.eraseFromParent(); // The pseudo instruction is gone now. 3357 return BB; 3358 } 3359 3360 // Emit the INSERT_([BHWD]|F[WD])_VIDX pseudo instruction. 3361 // 3362 // For integer: 3363 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $rs) 3364 // => 3365 // (SLL $lanetmp1, $lane, <log2size) 3366 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3367 // (INSERT_[BHWD], $wdtmp2, $wdtmp1, 0, $rs) 3368 // (NEG $lanetmp2, $lanetmp1) 3369 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3370 // 3371 // For floating point: 3372 // (INSERT_([BHWD]|F[WD])_PSEUDO $wd, $wd_in, $n, $fs) 3373 // => 3374 // (SUBREG_TO_REG $wt, $fs, <subreg>) 3375 // (SLL $lanetmp1, $lane, <log2size) 3376 // (SLD_B $wdtmp1, $wd_in, $wd_in, $lanetmp1) 3377 // (INSVE_[WD], $wdtmp2, 0, $wdtmp1, 0) 3378 // (NEG $lanetmp2, $lanetmp1) 3379 // (SLD_B $wd, $wdtmp2, $wdtmp2, $lanetmp2) 3380 MachineBasicBlock *MipsSETargetLowering::emitINSERT_DF_VIDX( 3381 MachineInstr &MI, MachineBasicBlock *BB, unsigned EltSizeInBytes, 3382 bool IsFP) const { 3383 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3384 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3385 DebugLoc DL = MI.getDebugLoc(); 3386 Register Wd = MI.getOperand(0).getReg(); 3387 Register SrcVecReg = MI.getOperand(1).getReg(); 3388 Register LaneReg = MI.getOperand(2).getReg(); 3389 Register SrcValReg = MI.getOperand(3).getReg(); 3390 3391 const TargetRegisterClass *VecRC = nullptr; 3392 // FIXME: This should be true for N32 too. 3393 const TargetRegisterClass *GPRRC = 3394 Subtarget.isABI_N64() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3395 unsigned SubRegIdx = Subtarget.isABI_N64() ? Mips::sub_32 : 0; 3396 unsigned ShiftOp = Subtarget.isABI_N64() ? Mips::DSLL : Mips::SLL; 3397 unsigned EltLog2Size; 3398 unsigned InsertOp = 0; 3399 unsigned InsveOp = 0; 3400 switch (EltSizeInBytes) { 3401 default: 3402 llvm_unreachable("Unexpected size"); 3403 case 1: 3404 EltLog2Size = 0; 3405 InsertOp = Mips::INSERT_B; 3406 InsveOp = Mips::INSVE_B; 3407 VecRC = &Mips::MSA128BRegClass; 3408 break; 3409 case 2: 3410 EltLog2Size = 1; 3411 InsertOp = Mips::INSERT_H; 3412 InsveOp = Mips::INSVE_H; 3413 VecRC = &Mips::MSA128HRegClass; 3414 break; 3415 case 4: 3416 EltLog2Size = 2; 3417 InsertOp = Mips::INSERT_W; 3418 InsveOp = Mips::INSVE_W; 3419 VecRC = &Mips::MSA128WRegClass; 3420 break; 3421 case 8: 3422 EltLog2Size = 3; 3423 InsertOp = Mips::INSERT_D; 3424 InsveOp = Mips::INSVE_D; 3425 VecRC = &Mips::MSA128DRegClass; 3426 break; 3427 } 3428 3429 if (IsFP) { 3430 Register Wt = RegInfo.createVirtualRegister(VecRC); 3431 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Wt) 3432 .addImm(0) 3433 .addReg(SrcValReg) 3434 .addImm(EltSizeInBytes == 8 ? Mips::sub_64 : Mips::sub_lo); 3435 SrcValReg = Wt; 3436 } 3437 3438 // Convert the lane index into a byte index 3439 if (EltSizeInBytes != 1) { 3440 Register LaneTmp1 = RegInfo.createVirtualRegister(GPRRC); 3441 BuildMI(*BB, MI, DL, TII->get(ShiftOp), LaneTmp1) 3442 .addReg(LaneReg) 3443 .addImm(EltLog2Size); 3444 LaneReg = LaneTmp1; 3445 } 3446 3447 // Rotate bytes around so that the desired lane is element zero 3448 Register WdTmp1 = RegInfo.createVirtualRegister(VecRC); 3449 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), WdTmp1) 3450 .addReg(SrcVecReg) 3451 .addReg(SrcVecReg) 3452 .addReg(LaneReg, 0, SubRegIdx); 3453 3454 Register WdTmp2 = RegInfo.createVirtualRegister(VecRC); 3455 if (IsFP) { 3456 // Use insve.df to insert to element zero 3457 BuildMI(*BB, MI, DL, TII->get(InsveOp), WdTmp2) 3458 .addReg(WdTmp1) 3459 .addImm(0) 3460 .addReg(SrcValReg) 3461 .addImm(0); 3462 } else { 3463 // Use insert.df to insert to element zero 3464 BuildMI(*BB, MI, DL, TII->get(InsertOp), WdTmp2) 3465 .addReg(WdTmp1) 3466 .addReg(SrcValReg) 3467 .addImm(0); 3468 } 3469 3470 // Rotate elements the rest of the way for a full rotation. 3471 // sld.df inteprets $rt modulo the number of columns so we only need to negate 3472 // the lane index to do this. 3473 Register LaneTmp2 = RegInfo.createVirtualRegister(GPRRC); 3474 BuildMI(*BB, MI, DL, TII->get(Subtarget.isABI_N64() ? Mips::DSUB : Mips::SUB), 3475 LaneTmp2) 3476 .addReg(Subtarget.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO) 3477 .addReg(LaneReg); 3478 BuildMI(*BB, MI, DL, TII->get(Mips::SLD_B), Wd) 3479 .addReg(WdTmp2) 3480 .addReg(WdTmp2) 3481 .addReg(LaneTmp2, 0, SubRegIdx); 3482 3483 MI.eraseFromParent(); // The pseudo instruction is gone now. 3484 return BB; 3485 } 3486 3487 // Emit the FILL_FW pseudo instruction. 3488 // 3489 // fill_fw_pseudo $wd, $fs 3490 // => 3491 // implicit_def $wt1 3492 // insert_subreg $wt2:subreg_lo, $wt1, $fs 3493 // splati.w $wd, $wt2[0] 3494 MachineBasicBlock * 3495 MipsSETargetLowering::emitFILL_FW(MachineInstr &MI, 3496 MachineBasicBlock *BB) const { 3497 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3498 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3499 DebugLoc DL = MI.getDebugLoc(); 3500 Register Wd = MI.getOperand(0).getReg(); 3501 Register Fs = MI.getOperand(1).getReg(); 3502 Register Wt1 = RegInfo.createVirtualRegister( 3503 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3504 : &Mips::MSA128WEvensRegClass); 3505 Register Wt2 = RegInfo.createVirtualRegister( 3506 Subtarget.useOddSPReg() ? &Mips::MSA128WRegClass 3507 : &Mips::MSA128WEvensRegClass); 3508 3509 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3510 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3511 .addReg(Wt1) 3512 .addReg(Fs) 3513 .addImm(Mips::sub_lo); 3514 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_W), Wd).addReg(Wt2).addImm(0); 3515 3516 MI.eraseFromParent(); // The pseudo instruction is gone now. 3517 return BB; 3518 } 3519 3520 // Emit the FILL_FD pseudo instruction. 3521 // 3522 // fill_fd_pseudo $wd, $fs 3523 // => 3524 // implicit_def $wt1 3525 // insert_subreg $wt2:subreg_64, $wt1, $fs 3526 // splati.d $wd, $wt2[0] 3527 MachineBasicBlock * 3528 MipsSETargetLowering::emitFILL_FD(MachineInstr &MI, 3529 MachineBasicBlock *BB) const { 3530 assert(Subtarget.isFP64bit()); 3531 3532 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3533 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3534 DebugLoc DL = MI.getDebugLoc(); 3535 Register Wd = MI.getOperand(0).getReg(); 3536 Register Fs = MI.getOperand(1).getReg(); 3537 Register Wt1 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3538 Register Wt2 = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3539 3540 BuildMI(*BB, MI, DL, TII->get(Mips::IMPLICIT_DEF), Wt1); 3541 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_SUBREG), Wt2) 3542 .addReg(Wt1) 3543 .addReg(Fs) 3544 .addImm(Mips::sub_64); 3545 BuildMI(*BB, MI, DL, TII->get(Mips::SPLATI_D), Wd).addReg(Wt2).addImm(0); 3546 3547 MI.eraseFromParent(); // The pseudo instruction is gone now. 3548 return BB; 3549 } 3550 3551 // Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA 3552 // register. 3553 // 3554 // STF16 MSA128F16:$wd, mem_simm10:$addr 3555 // => 3556 // copy_u.h $rtemp,$wd[0] 3557 // sh $rtemp, $addr 3558 // 3559 // Safety: We can't use st.h & co as they would over write the memory after 3560 // the destination. It would require half floats be allocated 16 bytes(!) of 3561 // space. 3562 MachineBasicBlock * 3563 MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI, 3564 MachineBasicBlock *BB) const { 3565 3566 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3567 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3568 DebugLoc DL = MI.getDebugLoc(); 3569 Register Ws = MI.getOperand(0).getReg(); 3570 Register Rt = MI.getOperand(1).getReg(); 3571 const MachineMemOperand &MMO = **MI.memoperands_begin(); 3572 unsigned Imm = MMO.getOffset(); 3573 3574 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3575 // spill and reload can expand as a GPR64 operand. Examine the 3576 // operand in detail and default to ABI. 3577 const TargetRegisterClass *RC = 3578 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3579 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3580 : &Mips::GPR64RegClass); 3581 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3582 Register Rs = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3583 3584 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0); 3585 if(!UsingMips32) { 3586 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR64RegClass); 3587 BuildMI(*BB, MI, DL, TII->get(Mips::SUBREG_TO_REG), Tmp) 3588 .addImm(0) 3589 .addReg(Rs) 3590 .addImm(Mips::sub_32); 3591 Rs = Tmp; 3592 } 3593 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64)) 3594 .addReg(Rs) 3595 .addReg(Rt) 3596 .addImm(Imm) 3597 .addMemOperand(BB->getParent()->getMachineMemOperand( 3598 &MMO, MMO.getOffset(), MMO.getSize())); 3599 3600 MI.eraseFromParent(); 3601 return BB; 3602 } 3603 3604 // Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register. 3605 // 3606 // LD_F16 MSA128F16:$wd, mem_simm10:$addr 3607 // => 3608 // lh $rtemp, $addr 3609 // fill.h $wd, $rtemp 3610 // 3611 // Safety: We can't use ld.h & co as they over-read from the source. 3612 // Additionally, if the address is not modulo 16, 2 cases can occur: 3613 // a) Segmentation fault as the load instruction reads from a memory page 3614 // memory it's not supposed to. 3615 // b) The load crosses an implementation specific boundary, requiring OS 3616 // intervention. 3617 MachineBasicBlock * 3618 MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI, 3619 MachineBasicBlock *BB) const { 3620 3621 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3622 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3623 DebugLoc DL = MI.getDebugLoc(); 3624 Register Wd = MI.getOperand(0).getReg(); 3625 3626 // Caution: A load via the GOT can expand to a GPR32 operand, a load via 3627 // spill and reload can expand as a GPR64 operand. Examine the 3628 // operand in detail and default to ABI. 3629 const TargetRegisterClass *RC = 3630 MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg()) 3631 : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass 3632 : &Mips::GPR64RegClass); 3633 3634 const bool UsingMips32 = RC == &Mips::GPR32RegClass; 3635 Register Rt = RegInfo.createVirtualRegister(RC); 3636 3637 MachineInstrBuilder MIB = 3638 BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt); 3639 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3640 MIB.add(MO); 3641 3642 if(!UsingMips32) { 3643 Register Tmp = RegInfo.createVirtualRegister(&Mips::GPR32RegClass); 3644 BuildMI(*BB, MI, DL, TII->get(Mips::COPY), Tmp).addReg(Rt, 0, Mips::sub_32); 3645 Rt = Tmp; 3646 } 3647 3648 BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt); 3649 3650 MI.eraseFromParent(); 3651 return BB; 3652 } 3653 3654 // Emit the FPROUND_PSEUDO instruction. 3655 // 3656 // Round an FGR64Opnd, FGR32Opnd to an f16. 3657 // 3658 // Safety: Cycle the operand through the GPRs so the result always ends up 3659 // the correct MSA register. 3660 // 3661 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs 3662 // / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register 3663 // (which they can be, as the MSA registers are defined to alias the 3664 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3665 // the correct register class. That requires operands be tie-able across 3666 // register classes which have a sub/super register class relationship. 3667 // 3668 // For FPG32Opnd: 3669 // 3670 // FPROUND MSA128F16:$wd, FGR32Opnd:$fs 3671 // => 3672 // mfc1 $rtemp, $fs 3673 // fill.w $rtemp, $wtemp 3674 // fexdo.w $wd, $wtemp, $wtemp 3675 // 3676 // For FPG64Opnd on mips32r2+: 3677 // 3678 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3679 // => 3680 // mfc1 $rtemp, $fs 3681 // fill.w $rtemp, $wtemp 3682 // mfhc1 $rtemp2, $fs 3683 // insert.w $wtemp[1], $rtemp2 3684 // insert.w $wtemp[3], $rtemp2 3685 // fexdo.w $wtemp2, $wtemp, $wtemp 3686 // fexdo.h $wd, $temp2, $temp2 3687 // 3688 // For FGR64Opnd on mips64r2+: 3689 // 3690 // FPROUND MSA128F16:$wd, FGR64Opnd:$fs 3691 // => 3692 // dmfc1 $rtemp, $fs 3693 // fill.d $rtemp, $wtemp 3694 // fexdo.w $wtemp2, $wtemp, $wtemp 3695 // fexdo.h $wd, $wtemp2, $wtemp2 3696 // 3697 // Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the 3698 // undef bits are "just right" and the exception enable bits are 3699 // set. By using fill.w to replicate $fs into all elements over 3700 // insert.w for one element, we avoid that potiential case. If 3701 // fexdo.[hw] causes an exception in, the exception is valid and it 3702 // occurs for all elements. 3703 MachineBasicBlock * 3704 MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI, 3705 MachineBasicBlock *BB, 3706 bool IsFGR64) const { 3707 3708 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3709 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3710 // it. 3711 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3712 3713 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3714 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3715 3716 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3717 DebugLoc DL = MI.getDebugLoc(); 3718 Register Wd = MI.getOperand(0).getReg(); 3719 Register Fs = MI.getOperand(1).getReg(); 3720 3721 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3722 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3723 const TargetRegisterClass *GPRRC = 3724 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3725 unsigned MFC1Opc = IsFGR64onMips64 3726 ? Mips::DMFC1 3727 : (IsFGR64onMips32 ? Mips::MFC1_D64 : Mips::MFC1); 3728 unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W; 3729 3730 // Perform the register class copy as mentioned above. 3731 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3732 BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs); 3733 BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp); 3734 unsigned WPHI = Wtemp; 3735 3736 if (IsFGR64onMips32) { 3737 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3738 BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs); 3739 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3740 Register Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3741 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2) 3742 .addReg(Wtemp) 3743 .addReg(Rtemp2) 3744 .addImm(1); 3745 BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3) 3746 .addReg(Wtemp2) 3747 .addReg(Rtemp2) 3748 .addImm(3); 3749 WPHI = Wtemp3; 3750 } 3751 3752 if (IsFGR64) { 3753 Register Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3754 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2) 3755 .addReg(WPHI) 3756 .addReg(WPHI); 3757 WPHI = Wtemp2; 3758 } 3759 3760 BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI); 3761 3762 MI.eraseFromParent(); 3763 return BB; 3764 } 3765 3766 // Emit the FPEXTEND_PSEUDO instruction. 3767 // 3768 // Expand an f16 to either a FGR32Opnd or FGR64Opnd. 3769 // 3770 // Safety: Cycle the result through the GPRs so the result always ends up 3771 // the correct floating point register. 3772 // 3773 // FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd 3774 // / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register 3775 // (which they can be, as the MSA registers are defined to alias the 3776 // FPU's 64 bit and 32 bit registers) the result can be accessed using 3777 // the correct register class. That requires operands be tie-able across 3778 // register classes which have a sub/super register class relationship. I 3779 // haven't checked. 3780 // 3781 // For FGR32Opnd: 3782 // 3783 // FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws 3784 // => 3785 // fexupr.w $wtemp, $ws 3786 // copy_s.w $rtemp, $ws[0] 3787 // mtc1 $rtemp, $fd 3788 // 3789 // For FGR64Opnd on Mips64: 3790 // 3791 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3792 // => 3793 // fexupr.w $wtemp, $ws 3794 // fexupr.d $wtemp2, $wtemp 3795 // copy_s.d $rtemp, $wtemp2s[0] 3796 // dmtc1 $rtemp, $fd 3797 // 3798 // For FGR64Opnd on Mips32: 3799 // 3800 // FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws 3801 // => 3802 // fexupr.w $wtemp, $ws 3803 // fexupr.d $wtemp2, $wtemp 3804 // copy_s.w $rtemp, $wtemp2[0] 3805 // mtc1 $rtemp, $ftemp 3806 // copy_s.w $rtemp2, $wtemp2[1] 3807 // $fd = mthc1 $rtemp2, $ftemp 3808 MachineBasicBlock * 3809 MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI, 3810 MachineBasicBlock *BB, 3811 bool IsFGR64) const { 3812 3813 // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous 3814 // here. It's technically doable to support MIPS32 here, but the ISA forbids 3815 // it. 3816 assert(Subtarget.hasMSA() && Subtarget.hasMips32r2()); 3817 3818 bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64; 3819 bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64; 3820 3821 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3822 DebugLoc DL = MI.getDebugLoc(); 3823 Register Fd = MI.getOperand(0).getReg(); 3824 Register Ws = MI.getOperand(1).getReg(); 3825 3826 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3827 const TargetRegisterClass *GPRRC = 3828 IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; 3829 unsigned MTC1Opc = IsFGR64onMips64 3830 ? Mips::DMTC1 3831 : (IsFGR64onMips32 ? Mips::MTC1_D64 : Mips::MTC1); 3832 Register COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W; 3833 3834 Register Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass); 3835 Register WPHI = Wtemp; 3836 3837 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws); 3838 if (IsFGR64) { 3839 WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass); 3840 BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp); 3841 } 3842 3843 // Perform the safety regclass copy mentioned above. 3844 Register Rtemp = RegInfo.createVirtualRegister(GPRRC); 3845 Register FPRPHI = IsFGR64onMips32 3846 ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass) 3847 : Fd; 3848 BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0); 3849 BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp); 3850 3851 if (IsFGR64onMips32) { 3852 Register Rtemp2 = RegInfo.createVirtualRegister(GPRRC); 3853 BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2) 3854 .addReg(WPHI) 3855 .addImm(1); 3856 BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd) 3857 .addReg(FPRPHI) 3858 .addReg(Rtemp2); 3859 } 3860 3861 MI.eraseFromParent(); 3862 return BB; 3863 } 3864 3865 // Emit the FEXP2_W_1 pseudo instructions. 3866 // 3867 // fexp2_w_1_pseudo $wd, $wt 3868 // => 3869 // ldi.w $ws, 1 3870 // fexp2.w $wd, $ws, $wt 3871 MachineBasicBlock * 3872 MipsSETargetLowering::emitFEXP2_W_1(MachineInstr &MI, 3873 MachineBasicBlock *BB) const { 3874 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3875 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3876 const TargetRegisterClass *RC = &Mips::MSA128WRegClass; 3877 Register Ws1 = RegInfo.createVirtualRegister(RC); 3878 Register Ws2 = RegInfo.createVirtualRegister(RC); 3879 DebugLoc DL = MI.getDebugLoc(); 3880 3881 // Splat 1.0 into a vector 3882 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_W), Ws1).addImm(1); 3883 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_W), Ws2).addReg(Ws1); 3884 3885 // Emit 1.0 * fexp2(Wt) 3886 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_W), MI.getOperand(0).getReg()) 3887 .addReg(Ws2) 3888 .addReg(MI.getOperand(1).getReg()); 3889 3890 MI.eraseFromParent(); // The pseudo instruction is gone now. 3891 return BB; 3892 } 3893 3894 // Emit the FEXP2_D_1 pseudo instructions. 3895 // 3896 // fexp2_d_1_pseudo $wd, $wt 3897 // => 3898 // ldi.d $ws, 1 3899 // fexp2.d $wd, $ws, $wt 3900 MachineBasicBlock * 3901 MipsSETargetLowering::emitFEXP2_D_1(MachineInstr &MI, 3902 MachineBasicBlock *BB) const { 3903 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 3904 MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo(); 3905 const TargetRegisterClass *RC = &Mips::MSA128DRegClass; 3906 Register Ws1 = RegInfo.createVirtualRegister(RC); 3907 Register Ws2 = RegInfo.createVirtualRegister(RC); 3908 DebugLoc DL = MI.getDebugLoc(); 3909 3910 // Splat 1.0 into a vector 3911 BuildMI(*BB, MI, DL, TII->get(Mips::LDI_D), Ws1).addImm(1); 3912 BuildMI(*BB, MI, DL, TII->get(Mips::FFINT_U_D), Ws2).addReg(Ws1); 3913 3914 // Emit 1.0 * fexp2(Wt) 3915 BuildMI(*BB, MI, DL, TII->get(Mips::FEXP2_D), MI.getOperand(0).getReg()) 3916 .addReg(Ws2) 3917 .addReg(MI.getOperand(1).getReg()); 3918 3919 MI.eraseFromParent(); // The pseudo instruction is gone now. 3920 return BB; 3921 } 3922