1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SelectionDAG::LegalizeVectors method. 10 // 11 // The vector legalizer looks for vector operations which might need to be 12 // scalarized and legalizes them. This is a separate step from Legalize because 13 // scalarizing can introduce illegal types. For example, suppose we have an 14 // ISD::SDIV of type v2i64 on x86-32. The type is legal (for example, addition 15 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the 16 // operation, which introduces nodes with the illegal type i64 which must be 17 // expanded. Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC; 18 // the operation must be unrolled, which introduces nodes with the illegal 19 // type i8 which must be promoted. 20 // 21 // This does not legalize vector manipulations like ISD::BUILD_VECTOR, 22 // or operations that happen to take a vector which are custom-lowered; 23 // the legalization for such operations never produces nodes 24 // with illegal types, so it's okay to put off legalizing them until 25 // SelectionDAG::Legalize runs. 26 // 27 //===----------------------------------------------------------------------===// 28 29 #include "llvm/ADT/DenseMap.h" 30 #include "llvm/ADT/SmallVector.h" 31 #include "llvm/Analysis/TargetLibraryInfo.h" 32 #include "llvm/Analysis/VectorUtils.h" 33 #include "llvm/CodeGen/ISDOpcodes.h" 34 #include "llvm/CodeGen/SelectionDAG.h" 35 #include "llvm/CodeGen/SelectionDAGNodes.h" 36 #include "llvm/CodeGen/TargetLowering.h" 37 #include "llvm/CodeGen/ValueTypes.h" 38 #include "llvm/CodeGenTypes/MachineValueType.h" 39 #include "llvm/IR/DataLayout.h" 40 #include "llvm/Support/Casting.h" 41 #include "llvm/Support/Compiler.h" 42 #include "llvm/Support/Debug.h" 43 #include "llvm/Support/ErrorHandling.h" 44 #include <cassert> 45 #include <cstdint> 46 #include <iterator> 47 #include <utility> 48 49 using namespace llvm; 50 51 #define DEBUG_TYPE "legalizevectorops" 52 53 namespace { 54 55 class VectorLegalizer { 56 SelectionDAG& DAG; 57 const TargetLowering &TLI; 58 bool Changed = false; // Keep track of whether anything changed 59 60 /// For nodes that are of legal width, and that have more than one use, this 61 /// map indicates what regularized operand to use. This allows us to avoid 62 /// legalizing the same thing more than once. 63 SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; 64 65 /// Adds a node to the translation cache. 66 void AddLegalizedOperand(SDValue From, SDValue To) { 67 LegalizedNodes.insert(std::make_pair(From, To)); 68 // If someone requests legalization of the new node, return itself. 69 if (From != To) 70 LegalizedNodes.insert(std::make_pair(To, To)); 71 } 72 73 /// Legalizes the given node. 74 SDValue LegalizeOp(SDValue Op); 75 76 /// Assuming the node is legal, "legalize" the results. 77 SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result); 78 79 /// Make sure Results are legal and update the translation cache. 80 SDValue RecursivelyLegalizeResults(SDValue Op, 81 MutableArrayRef<SDValue> Results); 82 83 /// Wrapper to interface LowerOperation with a vector of Results. 84 /// Returns false if the target wants to use default expansion. Otherwise 85 /// returns true. If return is true and the Results are empty, then the 86 /// target wants to keep the input node as is. 87 bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results); 88 89 /// Implements unrolling a VSETCC. 90 SDValue UnrollVSETCC(SDNode *Node); 91 92 /// Implement expand-based legalization of vector operations. 93 /// 94 /// This is just a high-level routine to dispatch to specific code paths for 95 /// operations to legalize them. 96 void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results); 97 98 /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if 99 /// FP_TO_SINT isn't legal. 100 void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 101 102 /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if 103 /// SINT_TO_FLOAT and SHR on vectors isn't legal. 104 void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 105 106 /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. 107 SDValue ExpandSEXTINREG(SDNode *Node); 108 109 /// Implement expansion for ANY_EXTEND_VECTOR_INREG. 110 /// 111 /// Shuffles the low lanes of the operand into place and bitcasts to the proper 112 /// type. The contents of the bits in the extended part of each element are 113 /// undef. 114 SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node); 115 116 /// Implement expansion for SIGN_EXTEND_VECTOR_INREG. 117 /// 118 /// Shuffles the low lanes of the operand into place, bitcasts to the proper 119 /// type, then shifts left and arithmetic shifts right to introduce a sign 120 /// extension. 121 SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node); 122 123 /// Implement expansion for ZERO_EXTEND_VECTOR_INREG. 124 /// 125 /// Shuffles the low lanes of the operand into place and blends zeros into 126 /// the remaining lanes, finally bitcasting to the proper type. 127 SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node); 128 129 /// Expand bswap of vectors into a shuffle if legal. 130 SDValue ExpandBSWAP(SDNode *Node); 131 132 /// Implement vselect in terms of XOR, AND, OR when blend is not 133 /// supported by the target. 134 SDValue ExpandVSELECT(SDNode *Node); 135 SDValue ExpandVP_SELECT(SDNode *Node); 136 SDValue ExpandVP_MERGE(SDNode *Node); 137 SDValue ExpandVP_REM(SDNode *Node); 138 SDValue ExpandVP_FNEG(SDNode *Node); 139 SDValue ExpandVP_FABS(SDNode *Node); 140 SDValue ExpandVP_FCOPYSIGN(SDNode *Node); 141 SDValue ExpandSELECT(SDNode *Node); 142 std::pair<SDValue, SDValue> ExpandLoad(SDNode *N); 143 SDValue ExpandStore(SDNode *N); 144 SDValue ExpandFNEG(SDNode *Node); 145 SDValue ExpandFABS(SDNode *Node); 146 SDValue ExpandFCOPYSIGN(SDNode *Node); 147 void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results); 148 void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); 149 SDValue ExpandBITREVERSE(SDNode *Node); 150 void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); 151 void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results); 152 void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results); 153 void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results); 154 void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); 155 void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results); 156 157 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, 158 SmallVectorImpl<SDValue> &Results); 159 bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32, 160 RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, 161 RTLIB::Libcall Call_F128, 162 RTLIB::Libcall Call_PPCF128, 163 SmallVectorImpl<SDValue> &Results); 164 165 void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results); 166 167 /// Implements vector promotion. 168 /// 169 /// This is essentially just bitcasting the operands to a different type and 170 /// bitcasting the result back to the original type. 171 void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results); 172 173 /// Implements [SU]INT_TO_FP vector promotion. 174 /// 175 /// This is a [zs]ext of the input operand to a larger integer type. 176 void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results); 177 178 /// Implements FP_TO_[SU]INT vector promotion of the result type. 179 /// 180 /// It is promoted to a larger integer type. The result is then 181 /// truncated back to the original type. 182 void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 183 184 /// Implements vector setcc operation promotion. 185 /// 186 /// All vector operands are promoted to a vector type with larger element 187 /// type. 188 void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results); 189 190 void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results); 191 192 public: 193 VectorLegalizer(SelectionDAG& dag) : 194 DAG(dag), TLI(dag.getTargetLoweringInfo()) {} 195 196 /// Begin legalizer the vector operations in the DAG. 197 bool Run(); 198 }; 199 200 } // end anonymous namespace 201 202 bool VectorLegalizer::Run() { 203 // Before we start legalizing vector nodes, check if there are any vectors. 204 bool HasVectors = false; 205 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 206 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) { 207 // Check if the values of the nodes contain vectors. We don't need to check 208 // the operands because we are going to check their values at some point. 209 HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); }); 210 211 // If we found a vector node we can start the legalization. 212 if (HasVectors) 213 break; 214 } 215 216 // If this basic block has no vectors then no need to legalize vectors. 217 if (!HasVectors) 218 return false; 219 220 // The legalize process is inherently a bottom-up recursive process (users 221 // legalize their uses before themselves). Given infinite stack space, we 222 // could just start legalizing on the root and traverse the whole graph. In 223 // practice however, this causes us to run out of stack space on large basic 224 // blocks. To avoid this problem, compute an ordering of the nodes where each 225 // node is only legalized after all of its operands are legalized. 226 DAG.AssignTopologicalOrder(); 227 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 228 E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) 229 LegalizeOp(SDValue(&*I, 0)); 230 231 // Finally, it's possible the root changed. Get the new root. 232 SDValue OldRoot = DAG.getRoot(); 233 assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); 234 DAG.setRoot(LegalizedNodes[OldRoot]); 235 236 LegalizedNodes.clear(); 237 238 // Remove dead nodes now. 239 DAG.RemoveDeadNodes(); 240 241 return Changed; 242 } 243 244 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) { 245 assert(Op->getNumValues() == Result->getNumValues() && 246 "Unexpected number of results"); 247 // Generic legalization: just pass the operand through. 248 for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i) 249 AddLegalizedOperand(Op.getValue(i), SDValue(Result, i)); 250 return SDValue(Result, Op.getResNo()); 251 } 252 253 SDValue 254 VectorLegalizer::RecursivelyLegalizeResults(SDValue Op, 255 MutableArrayRef<SDValue> Results) { 256 assert(Results.size() == Op->getNumValues() && 257 "Unexpected number of results"); 258 // Make sure that the generated code is itself legal. 259 for (unsigned i = 0, e = Results.size(); i != e; ++i) { 260 Results[i] = LegalizeOp(Results[i]); 261 AddLegalizedOperand(Op.getValue(i), Results[i]); 262 } 263 264 return Results[Op.getResNo()]; 265 } 266 267 SDValue VectorLegalizer::LegalizeOp(SDValue Op) { 268 // Note that LegalizeOp may be reentered even from single-use nodes, which 269 // means that we always must cache transformed nodes. 270 DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op); 271 if (I != LegalizedNodes.end()) return I->second; 272 273 // Legalize the operands 274 SmallVector<SDValue, 8> Ops; 275 for (const SDValue &Oper : Op->op_values()) 276 Ops.push_back(LegalizeOp(Oper)); 277 278 SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops); 279 280 bool HasVectorValueOrOp = 281 llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) || 282 llvm::any_of(Node->op_values(), 283 [](SDValue O) { return O.getValueType().isVector(); }); 284 if (!HasVectorValueOrOp) 285 return TranslateLegalizeResults(Op, Node); 286 287 TargetLowering::LegalizeAction Action = TargetLowering::Legal; 288 EVT ValVT; 289 switch (Op.getOpcode()) { 290 default: 291 return TranslateLegalizeResults(Op, Node); 292 case ISD::LOAD: { 293 LoadSDNode *LD = cast<LoadSDNode>(Node); 294 ISD::LoadExtType ExtType = LD->getExtensionType(); 295 EVT LoadedVT = LD->getMemoryVT(); 296 if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD) 297 Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT); 298 break; 299 } 300 case ISD::STORE: { 301 StoreSDNode *ST = cast<StoreSDNode>(Node); 302 EVT StVT = ST->getMemoryVT(); 303 MVT ValVT = ST->getValue().getSimpleValueType(); 304 if (StVT.isVector() && ST->isTruncatingStore()) 305 Action = TLI.getTruncStoreAction(ValVT, StVT); 306 break; 307 } 308 case ISD::MERGE_VALUES: 309 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); 310 // This operation lies about being legal: when it claims to be legal, 311 // it should actually be expanded. 312 if (Action == TargetLowering::Legal) 313 Action = TargetLowering::Expand; 314 break; 315 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 316 case ISD::STRICT_##DAGN: 317 #include "llvm/IR/ConstrainedOps.def" 318 ValVT = Node->getValueType(0); 319 if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP || 320 Op.getOpcode() == ISD::STRICT_UINT_TO_FP) 321 ValVT = Node->getOperand(1).getValueType(); 322 if (Op.getOpcode() == ISD::STRICT_FSETCC || 323 Op.getOpcode() == ISD::STRICT_FSETCCS) { 324 MVT OpVT = Node->getOperand(1).getSimpleValueType(); 325 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get(); 326 Action = TLI.getCondCodeAction(CCCode, OpVT); 327 if (Action == TargetLowering::Legal) 328 Action = TLI.getOperationAction(Node->getOpcode(), OpVT); 329 } else { 330 Action = TLI.getOperationAction(Node->getOpcode(), ValVT); 331 } 332 // If we're asked to expand a strict vector floating-point operation, 333 // by default we're going to simply unroll it. That is usually the 334 // best approach, except in the case where the resulting strict (scalar) 335 // operations would themselves use the fallback mutation to non-strict. 336 // In that specific case, just do the fallback on the vector op. 337 if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() && 338 TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) == 339 TargetLowering::Legal) { 340 EVT EltVT = ValVT.getVectorElementType(); 341 if (TLI.getOperationAction(Node->getOpcode(), EltVT) 342 == TargetLowering::Expand && 343 TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT) 344 == TargetLowering::Legal) 345 Action = TargetLowering::Legal; 346 } 347 break; 348 case ISD::ADD: 349 case ISD::SUB: 350 case ISD::MUL: 351 case ISD::MULHS: 352 case ISD::MULHU: 353 case ISD::SDIV: 354 case ISD::UDIV: 355 case ISD::SREM: 356 case ISD::UREM: 357 case ISD::SDIVREM: 358 case ISD::UDIVREM: 359 case ISD::FADD: 360 case ISD::FSUB: 361 case ISD::FMUL: 362 case ISD::FDIV: 363 case ISD::FREM: 364 case ISD::AND: 365 case ISD::OR: 366 case ISD::XOR: 367 case ISD::SHL: 368 case ISD::SRA: 369 case ISD::SRL: 370 case ISD::FSHL: 371 case ISD::FSHR: 372 case ISD::ROTL: 373 case ISD::ROTR: 374 case ISD::ABS: 375 case ISD::ABDS: 376 case ISD::ABDU: 377 case ISD::AVGCEILS: 378 case ISD::AVGCEILU: 379 case ISD::AVGFLOORS: 380 case ISD::AVGFLOORU: 381 case ISD::BSWAP: 382 case ISD::BITREVERSE: 383 case ISD::CTLZ: 384 case ISD::CTTZ: 385 case ISD::CTLZ_ZERO_UNDEF: 386 case ISD::CTTZ_ZERO_UNDEF: 387 case ISD::CTPOP: 388 case ISD::SELECT: 389 case ISD::VSELECT: 390 case ISD::SELECT_CC: 391 case ISD::ZERO_EXTEND: 392 case ISD::ANY_EXTEND: 393 case ISD::TRUNCATE: 394 case ISD::SIGN_EXTEND: 395 case ISD::FP_TO_SINT: 396 case ISD::FP_TO_UINT: 397 case ISD::FNEG: 398 case ISD::FABS: 399 case ISD::FMINNUM: 400 case ISD::FMAXNUM: 401 case ISD::FMINNUM_IEEE: 402 case ISD::FMAXNUM_IEEE: 403 case ISD::FMINIMUM: 404 case ISD::FMAXIMUM: 405 case ISD::FMINIMUMNUM: 406 case ISD::FMAXIMUMNUM: 407 case ISD::FCOPYSIGN: 408 case ISD::FSQRT: 409 case ISD::FSIN: 410 case ISD::FCOS: 411 case ISD::FTAN: 412 case ISD::FASIN: 413 case ISD::FACOS: 414 case ISD::FATAN: 415 case ISD::FATAN2: 416 case ISD::FSINH: 417 case ISD::FCOSH: 418 case ISD::FTANH: 419 case ISD::FLDEXP: 420 case ISD::FPOWI: 421 case ISD::FPOW: 422 case ISD::FLOG: 423 case ISD::FLOG2: 424 case ISD::FLOG10: 425 case ISD::FEXP: 426 case ISD::FEXP2: 427 case ISD::FEXP10: 428 case ISD::FCEIL: 429 case ISD::FTRUNC: 430 case ISD::FRINT: 431 case ISD::FNEARBYINT: 432 case ISD::FROUND: 433 case ISD::FROUNDEVEN: 434 case ISD::FFLOOR: 435 case ISD::FP_ROUND: 436 case ISD::FP_EXTEND: 437 case ISD::FPTRUNC_ROUND: 438 case ISD::FMA: 439 case ISD::SIGN_EXTEND_INREG: 440 case ISD::ANY_EXTEND_VECTOR_INREG: 441 case ISD::SIGN_EXTEND_VECTOR_INREG: 442 case ISD::ZERO_EXTEND_VECTOR_INREG: 443 case ISD::SMIN: 444 case ISD::SMAX: 445 case ISD::UMIN: 446 case ISD::UMAX: 447 case ISD::SMUL_LOHI: 448 case ISD::UMUL_LOHI: 449 case ISD::SADDO: 450 case ISD::UADDO: 451 case ISD::SSUBO: 452 case ISD::USUBO: 453 case ISD::SMULO: 454 case ISD::UMULO: 455 case ISD::FCANONICALIZE: 456 case ISD::FFREXP: 457 case ISD::FSINCOS: 458 case ISD::SADDSAT: 459 case ISD::UADDSAT: 460 case ISD::SSUBSAT: 461 case ISD::USUBSAT: 462 case ISD::SSHLSAT: 463 case ISD::USHLSAT: 464 case ISD::FP_TO_SINT_SAT: 465 case ISD::FP_TO_UINT_SAT: 466 case ISD::MGATHER: 467 case ISD::VECTOR_COMPRESS: 468 case ISD::SCMP: 469 case ISD::UCMP: 470 Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); 471 break; 472 case ISD::SMULFIX: 473 case ISD::SMULFIXSAT: 474 case ISD::UMULFIX: 475 case ISD::UMULFIXSAT: 476 case ISD::SDIVFIX: 477 case ISD::SDIVFIXSAT: 478 case ISD::UDIVFIX: 479 case ISD::UDIVFIXSAT: { 480 unsigned Scale = Node->getConstantOperandVal(2); 481 Action = TLI.getFixedPointOperationAction(Node->getOpcode(), 482 Node->getValueType(0), Scale); 483 break; 484 } 485 case ISD::LROUND: 486 case ISD::LLROUND: 487 case ISD::LRINT: 488 case ISD::LLRINT: 489 case ISD::SINT_TO_FP: 490 case ISD::UINT_TO_FP: 491 case ISD::VECREDUCE_ADD: 492 case ISD::VECREDUCE_MUL: 493 case ISD::VECREDUCE_AND: 494 case ISD::VECREDUCE_OR: 495 case ISD::VECREDUCE_XOR: 496 case ISD::VECREDUCE_SMAX: 497 case ISD::VECREDUCE_SMIN: 498 case ISD::VECREDUCE_UMAX: 499 case ISD::VECREDUCE_UMIN: 500 case ISD::VECREDUCE_FADD: 501 case ISD::VECREDUCE_FMUL: 502 case ISD::VECREDUCE_FMAX: 503 case ISD::VECREDUCE_FMIN: 504 case ISD::VECREDUCE_FMAXIMUM: 505 case ISD::VECREDUCE_FMINIMUM: 506 case ISD::VECTOR_FIND_LAST_ACTIVE: 507 Action = TLI.getOperationAction(Node->getOpcode(), 508 Node->getOperand(0).getValueType()); 509 break; 510 case ISD::VECREDUCE_SEQ_FADD: 511 case ISD::VECREDUCE_SEQ_FMUL: 512 Action = TLI.getOperationAction(Node->getOpcode(), 513 Node->getOperand(1).getValueType()); 514 break; 515 case ISD::SETCC: { 516 MVT OpVT = Node->getOperand(0).getSimpleValueType(); 517 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); 518 Action = TLI.getCondCodeAction(CCCode, OpVT); 519 if (Action == TargetLowering::Legal) 520 Action = TLI.getOperationAction(Node->getOpcode(), OpVT); 521 break; 522 } 523 524 #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...) \ 525 case ISD::VPID: { \ 526 EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS)) \ 527 : Node->getOperand(LEGALPOS).getValueType(); \ 528 if (ISD::VPID == ISD::VP_SETCC) { \ 529 ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \ 530 Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT()); \ 531 if (Action != TargetLowering::Legal) \ 532 break; \ 533 } \ 534 /* Defer non-vector results to LegalizeDAG. */ \ 535 if (!Node->getValueType(0).isVector() && \ 536 Node->getValueType(0) != MVT::Other) { \ 537 Action = TargetLowering::Legal; \ 538 break; \ 539 } \ 540 Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT); \ 541 } break; 542 #include "llvm/IR/VPIntrinsics.def" 543 } 544 545 LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG)); 546 547 SmallVector<SDValue, 8> ResultVals; 548 switch (Action) { 549 default: llvm_unreachable("This action is not supported yet!"); 550 case TargetLowering::Promote: 551 assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) && 552 "This action is not supported yet!"); 553 LLVM_DEBUG(dbgs() << "Promoting\n"); 554 Promote(Node, ResultVals); 555 assert(!ResultVals.empty() && "No results for promotion?"); 556 break; 557 case TargetLowering::Legal: 558 LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n"); 559 break; 560 case TargetLowering::Custom: 561 LLVM_DEBUG(dbgs() << "Trying custom legalization\n"); 562 if (LowerOperationWrapper(Node, ResultVals)) 563 break; 564 LLVM_DEBUG(dbgs() << "Could not custom legalize node\n"); 565 [[fallthrough]]; 566 case TargetLowering::Expand: 567 LLVM_DEBUG(dbgs() << "Expanding\n"); 568 Expand(Node, ResultVals); 569 break; 570 } 571 572 if (ResultVals.empty()) 573 return TranslateLegalizeResults(Op, Node); 574 575 Changed = true; 576 return RecursivelyLegalizeResults(Op, ResultVals); 577 } 578 579 // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we 580 // merge them somehow? 581 bool VectorLegalizer::LowerOperationWrapper(SDNode *Node, 582 SmallVectorImpl<SDValue> &Results) { 583 SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG); 584 585 if (!Res.getNode()) 586 return false; 587 588 if (Res == SDValue(Node, 0)) 589 return true; 590 591 // If the original node has one result, take the return value from 592 // LowerOperation as is. It might not be result number 0. 593 if (Node->getNumValues() == 1) { 594 Results.push_back(Res); 595 return true; 596 } 597 598 // If the original node has multiple results, then the return node should 599 // have the same number of results. 600 assert((Node->getNumValues() == Res->getNumValues()) && 601 "Lowering returned the wrong number of results!"); 602 603 // Places new result values base on N result number. 604 for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I) 605 Results.push_back(Res.getValue(I)); 606 607 return true; 608 } 609 610 void VectorLegalizer::PromoteSETCC(SDNode *Node, 611 SmallVectorImpl<SDValue> &Results) { 612 MVT VecVT = Node->getOperand(0).getSimpleValueType(); 613 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); 614 615 unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND; 616 617 SDLoc DL(Node); 618 SmallVector<SDValue, 5> Operands(Node->getNumOperands()); 619 620 Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0)); 621 Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1)); 622 Operands[2] = Node->getOperand(2); 623 624 if (Node->getOpcode() == ISD::VP_SETCC) { 625 Operands[3] = Node->getOperand(3); // mask 626 Operands[4] = Node->getOperand(4); // evl 627 } 628 629 SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0), 630 Operands, Node->getFlags()); 631 632 Results.push_back(Res); 633 } 634 635 void VectorLegalizer::PromoteSTRICT(SDNode *Node, 636 SmallVectorImpl<SDValue> &Results) { 637 MVT VecVT = Node->getOperand(1).getSimpleValueType(); 638 MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); 639 640 assert(VecVT.isFloatingPoint()); 641 642 SDLoc DL(Node); 643 SmallVector<SDValue, 5> Operands(Node->getNumOperands()); 644 SmallVector<SDValue, 2> Chains; 645 646 for (unsigned j = 1; j != Node->getNumOperands(); ++j) 647 if (Node->getOperand(j).getValueType().isVector() && 648 !(ISD::isVPOpcode(Node->getOpcode()) && 649 ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand. 650 { 651 // promote the vector operand. 652 SDValue Ext = 653 DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other}, 654 {Node->getOperand(0), Node->getOperand(j)}); 655 Operands[j] = Ext.getValue(0); 656 Chains.push_back(Ext.getValue(1)); 657 } else 658 Operands[j] = Node->getOperand(j); // Skip no vector operand. 659 660 SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1)); 661 662 Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 663 664 SDValue Res = 665 DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags()); 666 667 SDValue Round = 668 DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other}, 669 {Res.getValue(1), Res.getValue(0), 670 DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)}); 671 672 Results.push_back(Round.getValue(0)); 673 Results.push_back(Round.getValue(1)); 674 } 675 676 void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) { 677 // For a few operations there is a specific concept for promotion based on 678 // the operand's type. 679 switch (Node->getOpcode()) { 680 case ISD::SINT_TO_FP: 681 case ISD::UINT_TO_FP: 682 case ISD::STRICT_SINT_TO_FP: 683 case ISD::STRICT_UINT_TO_FP: 684 // "Promote" the operation by extending the operand. 685 PromoteINT_TO_FP(Node, Results); 686 return; 687 case ISD::FP_TO_UINT: 688 case ISD::FP_TO_SINT: 689 case ISD::STRICT_FP_TO_UINT: 690 case ISD::STRICT_FP_TO_SINT: 691 // Promote the operation by extending the operand. 692 PromoteFP_TO_INT(Node, Results); 693 return; 694 case ISD::VP_SETCC: 695 case ISD::SETCC: 696 // Promote the operation by extending the operand. 697 PromoteSETCC(Node, Results); 698 return; 699 case ISD::STRICT_FADD: 700 case ISD::STRICT_FSUB: 701 case ISD::STRICT_FMUL: 702 case ISD::STRICT_FDIV: 703 case ISD::STRICT_FSQRT: 704 case ISD::STRICT_FMA: 705 PromoteSTRICT(Node, Results); 706 return; 707 case ISD::FP_ROUND: 708 case ISD::FP_EXTEND: 709 // These operations are used to do promotion so they can't be promoted 710 // themselves. 711 llvm_unreachable("Don't know how to promote this operation!"); 712 case ISD::VP_FABS: 713 case ISD::VP_FCOPYSIGN: 714 case ISD::VP_FNEG: 715 // Promoting fabs, fneg, and fcopysign changes their semantics. 716 llvm_unreachable("These operations should not be promoted"); 717 } 718 719 // There are currently two cases of vector promotion: 720 // 1) Bitcasting a vector of integers to a different type to a vector of the 721 // same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64. 722 // 2) Extending a vector of floats to a vector of the same number of larger 723 // floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32. 724 assert(Node->getNumValues() == 1 && 725 "Can't promote a vector with multiple results!"); 726 MVT VT = Node->getSimpleValueType(0); 727 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); 728 SDLoc dl(Node); 729 SmallVector<SDValue, 4> Operands(Node->getNumOperands()); 730 731 for (unsigned j = 0; j != Node->getNumOperands(); ++j) { 732 // Do not promote the mask operand of a VP OP. 733 bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) && 734 ISD::getVPMaskIdx(Node->getOpcode()) == j; 735 if (Node->getOperand(j).getValueType().isVector() && !SkipPromote) 736 if (Node->getOperand(j) 737 .getValueType() 738 .getVectorElementType() 739 .isFloatingPoint() && 740 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()) 741 if (ISD::isVPOpcode(Node->getOpcode())) { 742 unsigned EVLIdx = 743 *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode()); 744 unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode()); 745 Operands[j] = 746 DAG.getNode(ISD::VP_FP_EXTEND, dl, NVT, Node->getOperand(j), 747 Node->getOperand(MaskIdx), Node->getOperand(EVLIdx)); 748 } else { 749 Operands[j] = 750 DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j)); 751 } 752 else 753 Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j)); 754 else 755 Operands[j] = Node->getOperand(j); 756 } 757 758 SDValue Res = 759 DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags()); 760 761 if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) || 762 (VT.isVector() && VT.getVectorElementType().isFloatingPoint() && 763 NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())) 764 if (ISD::isVPOpcode(Node->getOpcode())) { 765 unsigned EVLIdx = *ISD::getVPExplicitVectorLengthIdx(Node->getOpcode()); 766 unsigned MaskIdx = *ISD::getVPMaskIdx(Node->getOpcode()); 767 Res = DAG.getNode(ISD::VP_FP_ROUND, dl, VT, Res, 768 Node->getOperand(MaskIdx), Node->getOperand(EVLIdx)); 769 } else { 770 Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res, 771 DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)); 772 } 773 else 774 Res = DAG.getNode(ISD::BITCAST, dl, VT, Res); 775 776 Results.push_back(Res); 777 } 778 779 void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node, 780 SmallVectorImpl<SDValue> &Results) { 781 // INT_TO_FP operations may require the input operand be promoted even 782 // when the type is otherwise legal. 783 bool IsStrict = Node->isStrictFPOpcode(); 784 MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType(); 785 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); 786 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && 787 "Vectors have different number of elements!"); 788 789 SDLoc dl(Node); 790 SmallVector<SDValue, 4> Operands(Node->getNumOperands()); 791 792 unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP || 793 Node->getOpcode() == ISD::STRICT_UINT_TO_FP) 794 ? ISD::ZERO_EXTEND 795 : ISD::SIGN_EXTEND; 796 for (unsigned j = 0; j != Node->getNumOperands(); ++j) { 797 if (Node->getOperand(j).getValueType().isVector()) 798 Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j)); 799 else 800 Operands[j] = Node->getOperand(j); 801 } 802 803 if (IsStrict) { 804 SDValue Res = DAG.getNode(Node->getOpcode(), dl, 805 {Node->getValueType(0), MVT::Other}, Operands); 806 Results.push_back(Res); 807 Results.push_back(Res.getValue(1)); 808 return; 809 } 810 811 SDValue Res = 812 DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands); 813 Results.push_back(Res); 814 } 815 816 // For FP_TO_INT we promote the result type to a vector type with wider 817 // elements and then truncate the result. This is different from the default 818 // PromoteVector which uses bitcast to promote thus assumning that the 819 // promoted vector type has the same overall size. 820 void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node, 821 SmallVectorImpl<SDValue> &Results) { 822 MVT VT = Node->getSimpleValueType(0); 823 MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); 824 bool IsStrict = Node->isStrictFPOpcode(); 825 assert(NVT.getVectorNumElements() == VT.getVectorNumElements() && 826 "Vectors have different number of elements!"); 827 828 unsigned NewOpc = Node->getOpcode(); 829 // Change FP_TO_UINT to FP_TO_SINT if possible. 830 // TODO: Should we only do this if FP_TO_UINT itself isn't legal? 831 if (NewOpc == ISD::FP_TO_UINT && 832 TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT)) 833 NewOpc = ISD::FP_TO_SINT; 834 835 if (NewOpc == ISD::STRICT_FP_TO_UINT && 836 TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT)) 837 NewOpc = ISD::STRICT_FP_TO_SINT; 838 839 SDLoc dl(Node); 840 SDValue Promoted, Chain; 841 if (IsStrict) { 842 Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other}, 843 {Node->getOperand(0), Node->getOperand(1)}); 844 Chain = Promoted.getValue(1); 845 } else 846 Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0)); 847 848 // Assert that the converted value fits in the original type. If it doesn't 849 // (eg: because the value being converted is too big), then the result of the 850 // original operation was undefined anyway, so the assert is still correct. 851 if (Node->getOpcode() == ISD::FP_TO_UINT || 852 Node->getOpcode() == ISD::STRICT_FP_TO_UINT) 853 NewOpc = ISD::AssertZext; 854 else 855 NewOpc = ISD::AssertSext; 856 857 Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted, 858 DAG.getValueType(VT.getScalarType())); 859 Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted); 860 Results.push_back(Promoted); 861 if (IsStrict) 862 Results.push_back(Chain); 863 } 864 865 std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) { 866 LoadSDNode *LD = cast<LoadSDNode>(N); 867 return TLI.scalarizeVectorLoad(LD, DAG); 868 } 869 870 SDValue VectorLegalizer::ExpandStore(SDNode *N) { 871 StoreSDNode *ST = cast<StoreSDNode>(N); 872 SDValue TF = TLI.scalarizeVectorStore(ST, DAG); 873 return TF; 874 } 875 876 void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) { 877 switch (Node->getOpcode()) { 878 case ISD::LOAD: { 879 std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node); 880 Results.push_back(Tmp.first); 881 Results.push_back(Tmp.second); 882 return; 883 } 884 case ISD::STORE: 885 Results.push_back(ExpandStore(Node)); 886 return; 887 case ISD::MERGE_VALUES: 888 for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) 889 Results.push_back(Node->getOperand(i)); 890 return; 891 case ISD::SIGN_EXTEND_INREG: 892 if (SDValue Expanded = ExpandSEXTINREG(Node)) { 893 Results.push_back(Expanded); 894 return; 895 } 896 break; 897 case ISD::ANY_EXTEND_VECTOR_INREG: 898 Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node)); 899 return; 900 case ISD::SIGN_EXTEND_VECTOR_INREG: 901 Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node)); 902 return; 903 case ISD::ZERO_EXTEND_VECTOR_INREG: 904 Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node)); 905 return; 906 case ISD::BSWAP: 907 if (SDValue Expanded = ExpandBSWAP(Node)) { 908 Results.push_back(Expanded); 909 return; 910 } 911 break; 912 case ISD::VP_BSWAP: 913 Results.push_back(TLI.expandVPBSWAP(Node, DAG)); 914 return; 915 case ISD::VSELECT: 916 if (SDValue Expanded = ExpandVSELECT(Node)) { 917 Results.push_back(Expanded); 918 return; 919 } 920 break; 921 case ISD::VP_SELECT: 922 if (SDValue Expanded = ExpandVP_SELECT(Node)) { 923 Results.push_back(Expanded); 924 return; 925 } 926 break; 927 case ISD::VP_SREM: 928 case ISD::VP_UREM: 929 if (SDValue Expanded = ExpandVP_REM(Node)) { 930 Results.push_back(Expanded); 931 return; 932 } 933 break; 934 case ISD::VP_FNEG: 935 if (SDValue Expanded = ExpandVP_FNEG(Node)) { 936 Results.push_back(Expanded); 937 return; 938 } 939 break; 940 case ISD::VP_FABS: 941 if (SDValue Expanded = ExpandVP_FABS(Node)) { 942 Results.push_back(Expanded); 943 return; 944 } 945 break; 946 case ISD::VP_FCOPYSIGN: 947 if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) { 948 Results.push_back(Expanded); 949 return; 950 } 951 break; 952 case ISD::SELECT: 953 if (SDValue Expanded = ExpandSELECT(Node)) { 954 Results.push_back(Expanded); 955 return; 956 } 957 break; 958 case ISD::SELECT_CC: { 959 if (Node->getValueType(0).isScalableVector()) { 960 EVT CondVT = TLI.getSetCCResultType( 961 DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0)); 962 SDValue SetCC = 963 DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0), 964 Node->getOperand(1), Node->getOperand(4)); 965 Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC, 966 Node->getOperand(2), 967 Node->getOperand(3))); 968 return; 969 } 970 break; 971 } 972 case ISD::FP_TO_UINT: 973 ExpandFP_TO_UINT(Node, Results); 974 return; 975 case ISD::UINT_TO_FP: 976 ExpandUINT_TO_FLOAT(Node, Results); 977 return; 978 case ISD::FNEG: 979 if (SDValue Expanded = ExpandFNEG(Node)) { 980 Results.push_back(Expanded); 981 return; 982 } 983 break; 984 case ISD::FABS: 985 if (SDValue Expanded = ExpandFABS(Node)) { 986 Results.push_back(Expanded); 987 return; 988 } 989 break; 990 case ISD::FCOPYSIGN: 991 if (SDValue Expanded = ExpandFCOPYSIGN(Node)) { 992 Results.push_back(Expanded); 993 return; 994 } 995 break; 996 case ISD::FSUB: 997 ExpandFSUB(Node, Results); 998 return; 999 case ISD::SETCC: 1000 case ISD::VP_SETCC: 1001 ExpandSETCC(Node, Results); 1002 return; 1003 case ISD::ABS: 1004 if (SDValue Expanded = TLI.expandABS(Node, DAG)) { 1005 Results.push_back(Expanded); 1006 return; 1007 } 1008 break; 1009 case ISD::ABDS: 1010 case ISD::ABDU: 1011 if (SDValue Expanded = TLI.expandABD(Node, DAG)) { 1012 Results.push_back(Expanded); 1013 return; 1014 } 1015 break; 1016 case ISD::AVGCEILS: 1017 case ISD::AVGCEILU: 1018 case ISD::AVGFLOORS: 1019 case ISD::AVGFLOORU: 1020 if (SDValue Expanded = TLI.expandAVG(Node, DAG)) { 1021 Results.push_back(Expanded); 1022 return; 1023 } 1024 break; 1025 case ISD::BITREVERSE: 1026 if (SDValue Expanded = ExpandBITREVERSE(Node)) { 1027 Results.push_back(Expanded); 1028 return; 1029 } 1030 break; 1031 case ISD::VP_BITREVERSE: 1032 if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) { 1033 Results.push_back(Expanded); 1034 return; 1035 } 1036 break; 1037 case ISD::CTPOP: 1038 if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) { 1039 Results.push_back(Expanded); 1040 return; 1041 } 1042 break; 1043 case ISD::VP_CTPOP: 1044 if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) { 1045 Results.push_back(Expanded); 1046 return; 1047 } 1048 break; 1049 case ISD::CTLZ: 1050 case ISD::CTLZ_ZERO_UNDEF: 1051 if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) { 1052 Results.push_back(Expanded); 1053 return; 1054 } 1055 break; 1056 case ISD::VP_CTLZ: 1057 case ISD::VP_CTLZ_ZERO_UNDEF: 1058 if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) { 1059 Results.push_back(Expanded); 1060 return; 1061 } 1062 break; 1063 case ISD::CTTZ: 1064 case ISD::CTTZ_ZERO_UNDEF: 1065 if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) { 1066 Results.push_back(Expanded); 1067 return; 1068 } 1069 break; 1070 case ISD::VP_CTTZ: 1071 case ISD::VP_CTTZ_ZERO_UNDEF: 1072 if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) { 1073 Results.push_back(Expanded); 1074 return; 1075 } 1076 break; 1077 case ISD::FSHL: 1078 case ISD::VP_FSHL: 1079 case ISD::FSHR: 1080 case ISD::VP_FSHR: 1081 if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) { 1082 Results.push_back(Expanded); 1083 return; 1084 } 1085 break; 1086 case ISD::ROTL: 1087 case ISD::ROTR: 1088 if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) { 1089 Results.push_back(Expanded); 1090 return; 1091 } 1092 break; 1093 case ISD::FMINNUM: 1094 case ISD::FMAXNUM: 1095 if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) { 1096 Results.push_back(Expanded); 1097 return; 1098 } 1099 break; 1100 case ISD::FMINIMUM: 1101 case ISD::FMAXIMUM: 1102 Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG)); 1103 return; 1104 case ISD::FMINIMUMNUM: 1105 case ISD::FMAXIMUMNUM: 1106 Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG)); 1107 return; 1108 case ISD::SMIN: 1109 case ISD::SMAX: 1110 case ISD::UMIN: 1111 case ISD::UMAX: 1112 if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) { 1113 Results.push_back(Expanded); 1114 return; 1115 } 1116 break; 1117 case ISD::UADDO: 1118 case ISD::USUBO: 1119 ExpandUADDSUBO(Node, Results); 1120 return; 1121 case ISD::SADDO: 1122 case ISD::SSUBO: 1123 ExpandSADDSUBO(Node, Results); 1124 return; 1125 case ISD::UMULO: 1126 case ISD::SMULO: 1127 ExpandMULO(Node, Results); 1128 return; 1129 case ISD::USUBSAT: 1130 case ISD::SSUBSAT: 1131 case ISD::UADDSAT: 1132 case ISD::SADDSAT: 1133 if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) { 1134 Results.push_back(Expanded); 1135 return; 1136 } 1137 break; 1138 case ISD::USHLSAT: 1139 case ISD::SSHLSAT: 1140 if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) { 1141 Results.push_back(Expanded); 1142 return; 1143 } 1144 break; 1145 case ISD::FP_TO_SINT_SAT: 1146 case ISD::FP_TO_UINT_SAT: 1147 // Expand the fpsosisat if it is scalable to prevent it from unrolling below. 1148 if (Node->getValueType(0).isScalableVector()) { 1149 if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) { 1150 Results.push_back(Expanded); 1151 return; 1152 } 1153 } 1154 break; 1155 case ISD::SMULFIX: 1156 case ISD::UMULFIX: 1157 if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) { 1158 Results.push_back(Expanded); 1159 return; 1160 } 1161 break; 1162 case ISD::SMULFIXSAT: 1163 case ISD::UMULFIXSAT: 1164 // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly 1165 // why. Maybe it results in worse codegen compared to the unroll for some 1166 // targets? This should probably be investigated. And if we still prefer to 1167 // unroll an explanation could be helpful. 1168 break; 1169 case ISD::SDIVFIX: 1170 case ISD::UDIVFIX: 1171 ExpandFixedPointDiv(Node, Results); 1172 return; 1173 case ISD::SDIVFIXSAT: 1174 case ISD::UDIVFIXSAT: 1175 break; 1176 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1177 case ISD::STRICT_##DAGN: 1178 #include "llvm/IR/ConstrainedOps.def" 1179 ExpandStrictFPOp(Node, Results); 1180 return; 1181 case ISD::VECREDUCE_ADD: 1182 case ISD::VECREDUCE_MUL: 1183 case ISD::VECREDUCE_AND: 1184 case ISD::VECREDUCE_OR: 1185 case ISD::VECREDUCE_XOR: 1186 case ISD::VECREDUCE_SMAX: 1187 case ISD::VECREDUCE_SMIN: 1188 case ISD::VECREDUCE_UMAX: 1189 case ISD::VECREDUCE_UMIN: 1190 case ISD::VECREDUCE_FADD: 1191 case ISD::VECREDUCE_FMUL: 1192 case ISD::VECREDUCE_FMAX: 1193 case ISD::VECREDUCE_FMIN: 1194 case ISD::VECREDUCE_FMAXIMUM: 1195 case ISD::VECREDUCE_FMINIMUM: 1196 Results.push_back(TLI.expandVecReduce(Node, DAG)); 1197 return; 1198 case ISD::VECREDUCE_SEQ_FADD: 1199 case ISD::VECREDUCE_SEQ_FMUL: 1200 Results.push_back(TLI.expandVecReduceSeq(Node, DAG)); 1201 return; 1202 case ISD::SREM: 1203 case ISD::UREM: 1204 ExpandREM(Node, Results); 1205 return; 1206 case ISD::VP_MERGE: 1207 if (SDValue Expanded = ExpandVP_MERGE(Node)) { 1208 Results.push_back(Expanded); 1209 return; 1210 } 1211 break; 1212 case ISD::FREM: 1213 if (tryExpandVecMathCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, 1214 RTLIB::REM_F80, RTLIB::REM_F128, 1215 RTLIB::REM_PPCF128, Results)) 1216 return; 1217 1218 break; 1219 case ISD::FSINCOS: { 1220 RTLIB::Libcall LC = 1221 RTLIB::getFSINCOS(Node->getValueType(0).getVectorElementType()); 1222 if (DAG.expandMultipleResultFPLibCall(LC, Node, Results)) 1223 return; 1224 break; 1225 } 1226 case ISD::VECTOR_COMPRESS: 1227 Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG)); 1228 return; 1229 case ISD::VECTOR_FIND_LAST_ACTIVE: 1230 Results.push_back(TLI.expandVectorFindLastActive(Node, DAG)); 1231 return; 1232 case ISD::SCMP: 1233 case ISD::UCMP: 1234 Results.push_back(TLI.expandCMP(Node, DAG)); 1235 return; 1236 1237 case ISD::FADD: 1238 case ISD::FMUL: 1239 case ISD::FMA: 1240 case ISD::FDIV: 1241 case ISD::FCEIL: 1242 case ISD::FFLOOR: 1243 case ISD::FNEARBYINT: 1244 case ISD::FRINT: 1245 case ISD::FROUND: 1246 case ISD::FROUNDEVEN: 1247 case ISD::FTRUNC: 1248 case ISD::FSQRT: 1249 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) { 1250 Results.push_back(Expanded); 1251 return; 1252 } 1253 break; 1254 } 1255 1256 SDValue Unrolled = DAG.UnrollVectorOp(Node); 1257 if (Node->getNumValues() == 1) { 1258 Results.push_back(Unrolled); 1259 } else { 1260 assert(Node->getNumValues() == Unrolled->getNumValues() && 1261 "VectorLegalizer Expand returned wrong number of results!"); 1262 for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) 1263 Results.push_back(Unrolled.getValue(I)); 1264 } 1265 } 1266 1267 SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { 1268 // Lower a select instruction where the condition is a scalar and the 1269 // operands are vectors. Lower this select to VSELECT and implement it 1270 // using XOR AND OR. The selector bit is broadcasted. 1271 EVT VT = Node->getValueType(0); 1272 SDLoc DL(Node); 1273 1274 SDValue Mask = Node->getOperand(0); 1275 SDValue Op1 = Node->getOperand(1); 1276 SDValue Op2 = Node->getOperand(2); 1277 1278 assert(VT.isVector() && !Mask.getValueType().isVector() 1279 && Op1.getValueType() == Op2.getValueType() && "Invalid type"); 1280 1281 // If we can't even use the basic vector operations of 1282 // AND,OR,XOR, we will have to scalarize the op. 1283 // Notice that the operation may be 'promoted' which means that it is 1284 // 'bitcasted' to another type which is handled. 1285 // Also, we need to be able to construct a splat vector using either 1286 // BUILD_VECTOR or SPLAT_VECTOR. 1287 // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to 1288 // BUILD_VECTOR? 1289 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 1290 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 1291 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || 1292 TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR 1293 : ISD::SPLAT_VECTOR, 1294 VT) == TargetLowering::Expand) 1295 return SDValue(); 1296 1297 // Generate a mask operand. 1298 EVT MaskTy = VT.changeVectorElementTypeToInteger(); 1299 1300 // What is the size of each element in the vector mask. 1301 EVT BitTy = MaskTy.getScalarType(); 1302 1303 Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy), 1304 DAG.getConstant(0, DL, BitTy)); 1305 1306 // Broadcast the mask so that the entire vector is all one or all zero. 1307 Mask = DAG.getSplat(MaskTy, DL, Mask); 1308 1309 // Bitcast the operands to be the same type as the mask. 1310 // This is needed when we select between FP types because 1311 // the mask is a vector of integers. 1312 Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1); 1313 Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2); 1314 1315 SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy); 1316 1317 Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask); 1318 Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask); 1319 SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2); 1320 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); 1321 } 1322 1323 SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) { 1324 EVT VT = Node->getValueType(0); 1325 1326 // Make sure that the SRA and SHL instructions are available. 1327 if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || 1328 TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) 1329 return SDValue(); 1330 1331 SDLoc DL(Node); 1332 EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT(); 1333 1334 unsigned BW = VT.getScalarSizeInBits(); 1335 unsigned OrigBW = OrigTy.getScalarSizeInBits(); 1336 SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT); 1337 1338 SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz); 1339 return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); 1340 } 1341 1342 // Generically expand a vector anyext in register to a shuffle of the relevant 1343 // lanes into the appropriate locations, with other lanes left undef. 1344 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) { 1345 SDLoc DL(Node); 1346 EVT VT = Node->getValueType(0); 1347 int NumElements = VT.getVectorNumElements(); 1348 SDValue Src = Node->getOperand(0); 1349 EVT SrcVT = Src.getValueType(); 1350 int NumSrcElements = SrcVT.getVectorNumElements(); 1351 1352 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector 1353 // into a larger vector type. 1354 if (SrcVT.bitsLE(VT)) { 1355 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && 1356 "ANY_EXTEND_VECTOR_INREG vector size mismatch"); 1357 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); 1358 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), 1359 NumSrcElements); 1360 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), 1361 Src, DAG.getVectorIdxConstant(0, DL)); 1362 } 1363 1364 // Build a base mask of undef shuffles. 1365 SmallVector<int, 16> ShuffleMask; 1366 ShuffleMask.resize(NumSrcElements, -1); 1367 1368 // Place the extended lanes into the correct locations. 1369 int ExtLaneScale = NumSrcElements / NumElements; 1370 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; 1371 for (int i = 0; i < NumElements; ++i) 1372 ShuffleMask[i * ExtLaneScale + EndianOffset] = i; 1373 1374 return DAG.getNode( 1375 ISD::BITCAST, DL, VT, 1376 DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); 1377 } 1378 1379 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) { 1380 SDLoc DL(Node); 1381 EVT VT = Node->getValueType(0); 1382 SDValue Src = Node->getOperand(0); 1383 EVT SrcVT = Src.getValueType(); 1384 1385 // First build an any-extend node which can be legalized above when we 1386 // recurse through it. 1387 SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src); 1388 1389 // Now we need sign extend. Do this by shifting the elements. Even if these 1390 // aren't legal operations, they have a better chance of being legalized 1391 // without full scalarization than the sign extension does. 1392 unsigned EltWidth = VT.getScalarSizeInBits(); 1393 unsigned SrcEltWidth = SrcVT.getScalarSizeInBits(); 1394 SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT); 1395 return DAG.getNode(ISD::SRA, DL, VT, 1396 DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), 1397 ShiftAmount); 1398 } 1399 1400 // Generically expand a vector zext in register to a shuffle of the relevant 1401 // lanes into the appropriate locations, a blend of zero into the high bits, 1402 // and a bitcast to the wider element type. 1403 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) { 1404 SDLoc DL(Node); 1405 EVT VT = Node->getValueType(0); 1406 int NumElements = VT.getVectorNumElements(); 1407 SDValue Src = Node->getOperand(0); 1408 EVT SrcVT = Src.getValueType(); 1409 int NumSrcElements = SrcVT.getVectorNumElements(); 1410 1411 // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector 1412 // into a larger vector type. 1413 if (SrcVT.bitsLE(VT)) { 1414 assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && 1415 "ZERO_EXTEND_VECTOR_INREG vector size mismatch"); 1416 NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits(); 1417 SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(), 1418 NumSrcElements); 1419 Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), 1420 Src, DAG.getVectorIdxConstant(0, DL)); 1421 } 1422 1423 // Build up a zero vector to blend into this one. 1424 SDValue Zero = DAG.getConstant(0, DL, SrcVT); 1425 1426 // Shuffle the incoming lanes into the correct position, and pull all other 1427 // lanes from the zero vector. 1428 auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements)); 1429 1430 int ExtLaneScale = NumSrcElements / NumElements; 1431 int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0; 1432 for (int i = 0; i < NumElements; ++i) 1433 ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; 1434 1435 return DAG.getNode(ISD::BITCAST, DL, VT, 1436 DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); 1437 } 1438 1439 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) { 1440 int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; 1441 for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I) 1442 for (int J = ScalarSizeInBytes - 1; J >= 0; --J) 1443 ShuffleMask.push_back((I * ScalarSizeInBytes) + J); 1444 } 1445 1446 SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) { 1447 EVT VT = Node->getValueType(0); 1448 1449 // Scalable vectors can't use shuffle expansion. 1450 if (VT.isScalableVector()) 1451 return TLI.expandBSWAP(Node, DAG); 1452 1453 // Generate a byte wise shuffle mask for the BSWAP. 1454 SmallVector<int, 16> ShuffleMask; 1455 createBSWAPShuffleMask(VT, ShuffleMask); 1456 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size()); 1457 1458 // Only emit a shuffle if the mask is legal. 1459 if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) { 1460 SDLoc DL(Node); 1461 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); 1462 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask); 1463 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 1464 } 1465 1466 // If we have the appropriate vector bit operations, it is better to use them 1467 // than unrolling and expanding each component. 1468 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && 1469 TLI.isOperationLegalOrCustom(ISD::SRL, VT) && 1470 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && 1471 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) 1472 return TLI.expandBSWAP(Node, DAG); 1473 1474 // Otherwise let the caller unroll. 1475 return SDValue(); 1476 } 1477 1478 SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) { 1479 EVT VT = Node->getValueType(0); 1480 1481 // We can't unroll or use shuffles for scalable vectors. 1482 if (VT.isScalableVector()) 1483 return TLI.expandBITREVERSE(Node, DAG); 1484 1485 // If we have the scalar operation, it's probably cheaper to unroll it. 1486 if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType())) 1487 return SDValue(); 1488 1489 // If the vector element width is a whole number of bytes, test if its legal 1490 // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte 1491 // vector. This greatly reduces the number of bit shifts necessary. 1492 unsigned ScalarSizeInBits = VT.getScalarSizeInBits(); 1493 if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) { 1494 SmallVector<int, 16> BSWAPMask; 1495 createBSWAPShuffleMask(VT, BSWAPMask); 1496 1497 EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size()); 1498 if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) && 1499 (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) || 1500 (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) && 1501 TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) && 1502 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) && 1503 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) { 1504 SDLoc DL(Node); 1505 SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0)); 1506 Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), 1507 BSWAPMask); 1508 Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op); 1509 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 1510 return Op; 1511 } 1512 } 1513 1514 // If we have the appropriate vector bit operations, it is better to use them 1515 // than unrolling and expanding each component. 1516 if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) && 1517 TLI.isOperationLegalOrCustom(ISD::SRL, VT) && 1518 TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) && 1519 TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT)) 1520 return TLI.expandBITREVERSE(Node, DAG); 1521 1522 // Otherwise unroll. 1523 return SDValue(); 1524 } 1525 1526 SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) { 1527 // Implement VSELECT in terms of XOR, AND, OR 1528 // on platforms which do not support blend natively. 1529 SDLoc DL(Node); 1530 1531 SDValue Mask = Node->getOperand(0); 1532 SDValue Op1 = Node->getOperand(1); 1533 SDValue Op2 = Node->getOperand(2); 1534 1535 EVT VT = Mask.getValueType(); 1536 1537 // If we can't even use the basic vector operations of 1538 // AND,OR,XOR, we will have to scalarize the op. 1539 // Notice that the operation may be 'promoted' which means that it is 1540 // 'bitcasted' to another type which is handled. 1541 if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || 1542 TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || 1543 TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) 1544 return SDValue(); 1545 1546 // This operation also isn't safe with AND, OR, XOR when the boolean type is 1547 // 0/1 and the select operands aren't also booleans, as we need an all-ones 1548 // vector constant to mask with. 1549 // FIXME: Sign extend 1 to all ones if that's legal on the target. 1550 auto BoolContents = TLI.getBooleanContents(Op1.getValueType()); 1551 if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent && 1552 !(BoolContents == TargetLowering::ZeroOrOneBooleanContent && 1553 Op1.getValueType().getVectorElementType() == MVT::i1)) 1554 return SDValue(); 1555 1556 // If the mask and the type are different sizes, unroll the vector op. This 1557 // can occur when getSetCCResultType returns something that is different in 1558 // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. 1559 if (VT.getSizeInBits() != Op1.getValueSizeInBits()) 1560 return SDValue(); 1561 1562 // Bitcast the operands to be the same type as the mask. 1563 // This is needed when we select between FP types because 1564 // the mask is a vector of integers. 1565 Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1); 1566 Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2); 1567 1568 SDValue NotMask = DAG.getNOT(DL, Mask, VT); 1569 1570 Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask); 1571 Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask); 1572 SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2); 1573 return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val); 1574 } 1575 1576 SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) { 1577 // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which 1578 // do not support it natively. 1579 SDLoc DL(Node); 1580 1581 SDValue Mask = Node->getOperand(0); 1582 SDValue Op1 = Node->getOperand(1); 1583 SDValue Op2 = Node->getOperand(2); 1584 SDValue EVL = Node->getOperand(3); 1585 1586 EVT VT = Mask.getValueType(); 1587 1588 // If we can't even use the basic vector operations of 1589 // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op. 1590 if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand || 1591 TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand || 1592 TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand) 1593 return SDValue(); 1594 1595 // This operation also isn't safe when the operands aren't also booleans. 1596 if (Op1.getValueType().getVectorElementType() != MVT::i1) 1597 return SDValue(); 1598 1599 SDValue Ones = DAG.getAllOnesConstant(DL, VT); 1600 SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL); 1601 1602 Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL); 1603 Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL); 1604 return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL); 1605 } 1606 1607 SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) { 1608 // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector 1609 // indices less than the EVL/pivot are true. Combine that with the original 1610 // mask for a full-length mask. Use a full-length VSELECT to select between 1611 // the true and false values. 1612 SDLoc DL(Node); 1613 1614 SDValue Mask = Node->getOperand(0); 1615 SDValue Op1 = Node->getOperand(1); 1616 SDValue Op2 = Node->getOperand(2); 1617 SDValue EVL = Node->getOperand(3); 1618 1619 EVT MaskVT = Mask.getValueType(); 1620 bool IsFixedLen = MaskVT.isFixedLengthVector(); 1621 1622 EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(), 1623 MaskVT.getVectorElementCount()); 1624 1625 // If we can't construct the EVL mask efficiently, it's better to unroll. 1626 if ((IsFixedLen && 1627 !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) || 1628 (!IsFixedLen && 1629 (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) || 1630 !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT)))) 1631 return SDValue(); 1632 1633 // If using a SETCC would result in a different type than the mask type, 1634 // unroll. 1635 if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), 1636 EVLVecVT) != MaskVT) 1637 return SDValue(); 1638 1639 SDValue StepVec = DAG.getStepVector(DL, EVLVecVT); 1640 SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL); 1641 SDValue EVLMask = 1642 DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT); 1643 1644 SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask); 1645 return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2); 1646 } 1647 1648 SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) { 1649 // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB. 1650 EVT VT = Node->getValueType(0); 1651 1652 unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV; 1653 1654 if (!TLI.isOperationLegalOrCustom(DivOpc, VT) || 1655 !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) || 1656 !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT)) 1657 return SDValue(); 1658 1659 SDLoc DL(Node); 1660 1661 SDValue Dividend = Node->getOperand(0); 1662 SDValue Divisor = Node->getOperand(1); 1663 SDValue Mask = Node->getOperand(2); 1664 SDValue EVL = Node->getOperand(3); 1665 1666 // X % Y -> X-X/Y*Y 1667 SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL); 1668 SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL); 1669 return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL); 1670 } 1671 1672 SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) { 1673 EVT VT = Node->getValueType(0); 1674 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1675 1676 if (!TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT)) 1677 return SDValue(); 1678 1679 SDValue Mask = Node->getOperand(1); 1680 SDValue EVL = Node->getOperand(2); 1681 1682 SDLoc DL(Node); 1683 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1684 SDValue SignMask = DAG.getConstant( 1685 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); 1686 SDValue Xor = DAG.getNode(ISD::VP_XOR, DL, IntVT, Cast, SignMask, Mask, EVL); 1687 return DAG.getNode(ISD::BITCAST, DL, VT, Xor); 1688 } 1689 1690 SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) { 1691 EVT VT = Node->getValueType(0); 1692 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1693 1694 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT)) 1695 return SDValue(); 1696 1697 SDValue Mask = Node->getOperand(1); 1698 SDValue EVL = Node->getOperand(2); 1699 1700 SDLoc DL(Node); 1701 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1702 SDValue ClearSignMask = DAG.getConstant( 1703 APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); 1704 SDValue ClearSign = 1705 DAG.getNode(ISD::VP_AND, DL, IntVT, Cast, ClearSignMask, Mask, EVL); 1706 return DAG.getNode(ISD::BITCAST, DL, VT, ClearSign); 1707 } 1708 1709 SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) { 1710 EVT VT = Node->getValueType(0); 1711 1712 if (VT != Node->getOperand(1).getValueType()) 1713 return SDValue(); 1714 1715 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1716 if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT) || 1717 !TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT)) 1718 return SDValue(); 1719 1720 SDValue Mask = Node->getOperand(2); 1721 SDValue EVL = Node->getOperand(3); 1722 1723 SDLoc DL(Node); 1724 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1725 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1)); 1726 1727 SDValue SignMask = DAG.getConstant( 1728 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); 1729 SDValue SignBit = 1730 DAG.getNode(ISD::VP_AND, DL, IntVT, Sign, SignMask, Mask, EVL); 1731 1732 SDValue ClearSignMask = DAG.getConstant( 1733 APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); 1734 SDValue ClearedSign = 1735 DAG.getNode(ISD::VP_AND, DL, IntVT, Mag, ClearSignMask, Mask, EVL); 1736 1737 SDValue CopiedSign = DAG.getNode(ISD::VP_OR, DL, IntVT, ClearedSign, SignBit, 1738 Mask, EVL, SDNodeFlags::Disjoint); 1739 1740 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign); 1741 } 1742 1743 void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node, 1744 SmallVectorImpl<SDValue> &Results) { 1745 // Attempt to expand using TargetLowering. 1746 SDValue Result, Chain; 1747 if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) { 1748 Results.push_back(Result); 1749 if (Node->isStrictFPOpcode()) 1750 Results.push_back(Chain); 1751 return; 1752 } 1753 1754 // Otherwise go ahead and unroll. 1755 if (Node->isStrictFPOpcode()) { 1756 UnrollStrictFPOp(Node, Results); 1757 return; 1758 } 1759 1760 Results.push_back(DAG.UnrollVectorOp(Node)); 1761 } 1762 1763 void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node, 1764 SmallVectorImpl<SDValue> &Results) { 1765 bool IsStrict = Node->isStrictFPOpcode(); 1766 unsigned OpNo = IsStrict ? 1 : 0; 1767 SDValue Src = Node->getOperand(OpNo); 1768 EVT SrcVT = Src.getValueType(); 1769 EVT DstVT = Node->getValueType(0); 1770 SDLoc DL(Node); 1771 1772 // Attempt to expand using TargetLowering. 1773 SDValue Result; 1774 SDValue Chain; 1775 if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) { 1776 Results.push_back(Result); 1777 if (IsStrict) 1778 Results.push_back(Chain); 1779 return; 1780 } 1781 1782 // Make sure that the SINT_TO_FP and SRL instructions are available. 1783 if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == 1784 TargetLowering::Expand) || 1785 (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) == 1786 TargetLowering::Expand)) || 1787 TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) { 1788 if (IsStrict) { 1789 UnrollStrictFPOp(Node, Results); 1790 return; 1791 } 1792 1793 Results.push_back(DAG.UnrollVectorOp(Node)); 1794 return; 1795 } 1796 1797 unsigned BW = SrcVT.getScalarSizeInBits(); 1798 assert((BW == 64 || BW == 32) && 1799 "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide"); 1800 1801 // If STRICT_/FMUL is not supported by the target (in case of f16) replace the 1802 // UINT_TO_FP with a larger float and round to the smaller type 1803 if ((!IsStrict && !TLI.isOperationLegalOrCustom(ISD::FMUL, DstVT)) || 1804 (IsStrict && !TLI.isOperationLegalOrCustom(ISD::STRICT_FMUL, DstVT))) { 1805 EVT FPVT = BW == 32 ? MVT::f32 : MVT::f64; 1806 SDValue UIToFP; 1807 SDValue Result; 1808 SDValue TargetZero = DAG.getIntPtrConstant(0, DL, /*isTarget=*/true); 1809 EVT FloatVecVT = SrcVT.changeVectorElementType(FPVT); 1810 if (IsStrict) { 1811 UIToFP = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {FloatVecVT, MVT::Other}, 1812 {Node->getOperand(0), Src}); 1813 Result = DAG.getNode(ISD::STRICT_FP_ROUND, DL, {DstVT, MVT::Other}, 1814 {Node->getOperand(0), UIToFP, TargetZero}); 1815 Results.push_back(Result); 1816 Results.push_back(Result.getValue(1)); 1817 } else { 1818 UIToFP = DAG.getNode(ISD::UINT_TO_FP, DL, FloatVecVT, Src); 1819 Result = DAG.getNode(ISD::FP_ROUND, DL, DstVT, UIToFP, TargetZero); 1820 Results.push_back(Result); 1821 } 1822 1823 return; 1824 } 1825 1826 SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT); 1827 1828 // Constants to clear the upper part of the word. 1829 // Notice that we can also use SHL+SHR, but using a constant is slightly 1830 // faster on x86. 1831 uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF; 1832 SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT); 1833 1834 // Two to the power of half-word-size. 1835 SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT); 1836 1837 // Clear upper part of LO, lower HI 1838 SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord); 1839 SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask); 1840 1841 if (IsStrict) { 1842 // Convert hi and lo to floats 1843 // Convert the hi part back to the upper values 1844 // TODO: Can any fast-math-flags be set on these nodes? 1845 SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other}, 1846 {Node->getOperand(0), HI}); 1847 fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other}, 1848 {fHI.getValue(1), fHI, TWOHW}); 1849 SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other}, 1850 {Node->getOperand(0), LO}); 1851 1852 SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1), 1853 fLO.getValue(1)); 1854 1855 // Add the two halves 1856 SDValue Result = 1857 DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO}); 1858 1859 Results.push_back(Result); 1860 Results.push_back(Result.getValue(1)); 1861 return; 1862 } 1863 1864 // Convert hi and lo to floats 1865 // Convert the hi part back to the upper values 1866 // TODO: Can any fast-math-flags be set on these nodes? 1867 SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI); 1868 fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW); 1869 SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO); 1870 1871 // Add the two halves 1872 Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO)); 1873 } 1874 1875 SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) { 1876 EVT VT = Node->getValueType(0); 1877 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1878 1879 if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT)) 1880 return SDValue(); 1881 1882 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. 1883 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) && 1884 !VT.isScalableVector()) 1885 return SDValue(); 1886 1887 SDLoc DL(Node); 1888 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1889 SDValue SignMask = DAG.getConstant( 1890 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); 1891 SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask); 1892 return DAG.getNode(ISD::BITCAST, DL, VT, Xor); 1893 } 1894 1895 SDValue VectorLegalizer::ExpandFABS(SDNode *Node) { 1896 EVT VT = Node->getValueType(0); 1897 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1898 1899 if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT)) 1900 return SDValue(); 1901 1902 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. 1903 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) && 1904 !VT.isScalableVector()) 1905 return SDValue(); 1906 1907 SDLoc DL(Node); 1908 SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1909 SDValue ClearSignMask = DAG.getConstant( 1910 APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); 1911 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask); 1912 return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign); 1913 } 1914 1915 SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) { 1916 EVT VT = Node->getValueType(0); 1917 EVT IntVT = VT.changeVectorElementTypeToInteger(); 1918 1919 if (VT != Node->getOperand(1).getValueType() || 1920 !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) || 1921 !TLI.isOperationLegalOrCustom(ISD::OR, IntVT)) 1922 return SDValue(); 1923 1924 // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64. 1925 if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) && 1926 !VT.isScalableVector()) 1927 return SDValue(); 1928 1929 SDLoc DL(Node); 1930 SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0)); 1931 SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1)); 1932 1933 SDValue SignMask = DAG.getConstant( 1934 APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT); 1935 SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask); 1936 1937 SDValue ClearSignMask = DAG.getConstant( 1938 APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT); 1939 SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask); 1940 1941 SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit, 1942 SDNodeFlags::Disjoint); 1943 1944 return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign); 1945 } 1946 1947 void VectorLegalizer::ExpandFSUB(SDNode *Node, 1948 SmallVectorImpl<SDValue> &Results) { 1949 // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal, 1950 // we can defer this to operation legalization where it will be lowered as 1951 // a+(-b). 1952 EVT VT = Node->getValueType(0); 1953 if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) && 1954 TLI.isOperationLegalOrCustom(ISD::FADD, VT)) 1955 return; // Defer to LegalizeDAG 1956 1957 if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) { 1958 Results.push_back(Expanded); 1959 return; 1960 } 1961 1962 SDValue Tmp = DAG.UnrollVectorOp(Node); 1963 Results.push_back(Tmp); 1964 } 1965 1966 void VectorLegalizer::ExpandSETCC(SDNode *Node, 1967 SmallVectorImpl<SDValue> &Results) { 1968 bool NeedInvert = false; 1969 bool IsVP = Node->getOpcode() == ISD::VP_SETCC; 1970 bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC || 1971 Node->getOpcode() == ISD::STRICT_FSETCCS; 1972 bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS; 1973 unsigned Offset = IsStrict ? 1 : 0; 1974 1975 SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue(); 1976 SDValue LHS = Node->getOperand(0 + Offset); 1977 SDValue RHS = Node->getOperand(1 + Offset); 1978 SDValue CC = Node->getOperand(2 + Offset); 1979 1980 MVT OpVT = LHS.getSimpleValueType(); 1981 ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get(); 1982 1983 if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) { 1984 if (IsStrict) { 1985 UnrollStrictFPOp(Node, Results); 1986 return; 1987 } 1988 Results.push_back(UnrollVSETCC(Node)); 1989 return; 1990 } 1991 1992 SDValue Mask, EVL; 1993 if (IsVP) { 1994 Mask = Node->getOperand(3 + Offset); 1995 EVL = Node->getOperand(4 + Offset); 1996 } 1997 1998 SDLoc dl(Node); 1999 bool Legalized = 2000 TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask, 2001 EVL, NeedInvert, dl, Chain, IsSignaling); 2002 2003 if (Legalized) { 2004 // If we expanded the SETCC by swapping LHS and RHS, or by inverting the 2005 // condition code, create a new SETCC node. 2006 if (CC.getNode()) { 2007 if (IsStrict) { 2008 LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(), 2009 {Chain, LHS, RHS, CC}, Node->getFlags()); 2010 Chain = LHS.getValue(1); 2011 } else if (IsVP) { 2012 LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0), 2013 {LHS, RHS, CC, Mask, EVL}, Node->getFlags()); 2014 } else { 2015 LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC, 2016 Node->getFlags()); 2017 } 2018 } 2019 2020 // If we expanded the SETCC by inverting the condition code, then wrap 2021 // the existing SETCC in a NOT to restore the intended condition. 2022 if (NeedInvert) { 2023 if (!IsVP) 2024 LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0)); 2025 else 2026 LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0)); 2027 } 2028 } else { 2029 assert(!IsStrict && "Don't know how to expand for strict nodes."); 2030 2031 // Otherwise, SETCC for the given comparison type must be completely 2032 // illegal; expand it into a SELECT_CC. 2033 EVT VT = Node->getValueType(0); 2034 LHS = 2035 DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS, 2036 DAG.getBoolConstant(true, dl, VT, LHS.getValueType()), 2037 DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC); 2038 LHS->setFlags(Node->getFlags()); 2039 } 2040 2041 Results.push_back(LHS); 2042 if (IsStrict) 2043 Results.push_back(Chain); 2044 } 2045 2046 void VectorLegalizer::ExpandUADDSUBO(SDNode *Node, 2047 SmallVectorImpl<SDValue> &Results) { 2048 SDValue Result, Overflow; 2049 TLI.expandUADDSUBO(Node, Result, Overflow, DAG); 2050 Results.push_back(Result); 2051 Results.push_back(Overflow); 2052 } 2053 2054 void VectorLegalizer::ExpandSADDSUBO(SDNode *Node, 2055 SmallVectorImpl<SDValue> &Results) { 2056 SDValue Result, Overflow; 2057 TLI.expandSADDSUBO(Node, Result, Overflow, DAG); 2058 Results.push_back(Result); 2059 Results.push_back(Overflow); 2060 } 2061 2062 void VectorLegalizer::ExpandMULO(SDNode *Node, 2063 SmallVectorImpl<SDValue> &Results) { 2064 SDValue Result, Overflow; 2065 if (!TLI.expandMULO(Node, Result, Overflow, DAG)) 2066 std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node); 2067 2068 Results.push_back(Result); 2069 Results.push_back(Overflow); 2070 } 2071 2072 void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node, 2073 SmallVectorImpl<SDValue> &Results) { 2074 SDNode *N = Node; 2075 if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N), 2076 N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG)) 2077 Results.push_back(Expanded); 2078 } 2079 2080 void VectorLegalizer::ExpandStrictFPOp(SDNode *Node, 2081 SmallVectorImpl<SDValue> &Results) { 2082 if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) { 2083 ExpandUINT_TO_FLOAT(Node, Results); 2084 return; 2085 } 2086 if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) { 2087 ExpandFP_TO_UINT(Node, Results); 2088 return; 2089 } 2090 2091 if (Node->getOpcode() == ISD::STRICT_FSETCC || 2092 Node->getOpcode() == ISD::STRICT_FSETCCS) { 2093 ExpandSETCC(Node, Results); 2094 return; 2095 } 2096 2097 UnrollStrictFPOp(Node, Results); 2098 } 2099 2100 void VectorLegalizer::ExpandREM(SDNode *Node, 2101 SmallVectorImpl<SDValue> &Results) { 2102 assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) && 2103 "Expected REM node"); 2104 2105 SDValue Result; 2106 if (!TLI.expandREM(Node, Result, DAG)) 2107 Result = DAG.UnrollVectorOp(Node); 2108 Results.push_back(Result); 2109 } 2110 2111 // Try to expand libm nodes into vector math routine calls. Callers provide the 2112 // LibFunc equivalent of the passed in Node, which is used to lookup mappings 2113 // within TargetLibraryInfo. The only mappings considered are those where the 2114 // result and all operands are the same vector type. While predicated nodes are 2115 // not supported, we will emit calls to masked routines by passing in an all 2116 // true mask. 2117 bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC, 2118 SmallVectorImpl<SDValue> &Results) { 2119 // Chain must be propagated but currently strict fp operations are down 2120 // converted to their none strict counterpart. 2121 assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!"); 2122 2123 const char *LCName = TLI.getLibcallName(LC); 2124 if (!LCName) 2125 return false; 2126 LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n"); 2127 2128 EVT VT = Node->getValueType(0); 2129 ElementCount VL = VT.getVectorElementCount(); 2130 2131 // Lookup a vector function equivalent to the specified libcall. Prefer 2132 // unmasked variants but we will generate a mask if need be. 2133 const TargetLibraryInfo &TLibInfo = DAG.getLibInfo(); 2134 const VecDesc *VD = TLibInfo.getVectorMappingInfo(LCName, VL, false); 2135 if (!VD) 2136 VD = TLibInfo.getVectorMappingInfo(LCName, VL, /*Masked=*/true); 2137 if (!VD) 2138 return false; 2139 2140 LLVMContext *Ctx = DAG.getContext(); 2141 Type *Ty = VT.getTypeForEVT(*Ctx); 2142 Type *ScalarTy = Ty->getScalarType(); 2143 2144 // Construct a scalar function type based on Node's operands. 2145 SmallVector<Type *, 8> ArgTys; 2146 for (unsigned i = 0; i < Node->getNumOperands(); ++i) { 2147 assert(Node->getOperand(i).getValueType() == VT && 2148 "Expected matching vector types!"); 2149 ArgTys.push_back(ScalarTy); 2150 } 2151 FunctionType *ScalarFTy = FunctionType::get(ScalarTy, ArgTys, false); 2152 2153 // Generate call information for the vector function. 2154 const std::string MangledName = VD->getVectorFunctionABIVariantString(); 2155 auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy); 2156 if (!OptVFInfo) 2157 return false; 2158 2159 LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName() 2160 << "\n"); 2161 2162 // Sanity check just in case OptVFInfo has unexpected parameters. 2163 if (OptVFInfo->Shape.Parameters.size() != 2164 Node->getNumOperands() + VD->isMasked()) 2165 return false; 2166 2167 // Collect vector call operands. 2168 2169 SDLoc DL(Node); 2170 TargetLowering::ArgListTy Args; 2171 TargetLowering::ArgListEntry Entry; 2172 Entry.IsSExt = false; 2173 Entry.IsZExt = false; 2174 2175 unsigned OpNum = 0; 2176 for (auto &VFParam : OptVFInfo->Shape.Parameters) { 2177 if (VFParam.ParamKind == VFParamKind::GlobalPredicate) { 2178 EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *Ctx, VT); 2179 Entry.Node = DAG.getBoolConstant(true, DL, MaskVT, VT); 2180 Entry.Ty = MaskVT.getTypeForEVT(*Ctx); 2181 Args.push_back(Entry); 2182 continue; 2183 } 2184 2185 // Only vector operands are supported. 2186 if (VFParam.ParamKind != VFParamKind::Vector) 2187 return false; 2188 2189 Entry.Node = Node->getOperand(OpNum++); 2190 Entry.Ty = Ty; 2191 Args.push_back(Entry); 2192 } 2193 2194 // Emit a call to the vector function. 2195 SDValue Callee = DAG.getExternalSymbol(VD->getVectorFnName().data(), 2196 TLI.getPointerTy(DAG.getDataLayout())); 2197 TargetLowering::CallLoweringInfo CLI(DAG); 2198 CLI.setDebugLoc(DL) 2199 .setChain(DAG.getEntryNode()) 2200 .setLibCallee(CallingConv::C, Ty, Callee, std::move(Args)); 2201 2202 std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); 2203 Results.push_back(CallResult.first); 2204 return true; 2205 } 2206 2207 /// Try to expand the node to a vector libcall based on the result type. 2208 bool VectorLegalizer::tryExpandVecMathCall( 2209 SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, 2210 RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128, 2211 RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) { 2212 RTLIB::Libcall LC = RTLIB::getFPLibCall( 2213 Node->getValueType(0).getVectorElementType(), Call_F32, Call_F64, 2214 Call_F80, Call_F128, Call_PPCF128); 2215 2216 if (LC == RTLIB::UNKNOWN_LIBCALL) 2217 return false; 2218 2219 return tryExpandVecMathCall(Node, LC, Results); 2220 } 2221 2222 void VectorLegalizer::UnrollStrictFPOp(SDNode *Node, 2223 SmallVectorImpl<SDValue> &Results) { 2224 EVT VT = Node->getValueType(0); 2225 EVT EltVT = VT.getVectorElementType(); 2226 unsigned NumElems = VT.getVectorNumElements(); 2227 unsigned NumOpers = Node->getNumOperands(); 2228 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2229 2230 EVT TmpEltVT = EltVT; 2231 if (Node->getOpcode() == ISD::STRICT_FSETCC || 2232 Node->getOpcode() == ISD::STRICT_FSETCCS) 2233 TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(), 2234 *DAG.getContext(), TmpEltVT); 2235 2236 EVT ValueVTs[] = {TmpEltVT, MVT::Other}; 2237 SDValue Chain = Node->getOperand(0); 2238 SDLoc dl(Node); 2239 2240 SmallVector<SDValue, 32> OpValues; 2241 SmallVector<SDValue, 32> OpChains; 2242 for (unsigned i = 0; i < NumElems; ++i) { 2243 SmallVector<SDValue, 4> Opers; 2244 SDValue Idx = DAG.getVectorIdxConstant(i, dl); 2245 2246 // The Chain is the first operand. 2247 Opers.push_back(Chain); 2248 2249 // Now process the remaining operands. 2250 for (unsigned j = 1; j < NumOpers; ++j) { 2251 SDValue Oper = Node->getOperand(j); 2252 EVT OperVT = Oper.getValueType(); 2253 2254 if (OperVT.isVector()) 2255 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 2256 OperVT.getVectorElementType(), Oper, Idx); 2257 2258 Opers.push_back(Oper); 2259 } 2260 2261 SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers); 2262 SDValue ScalarResult = ScalarOp.getValue(0); 2263 SDValue ScalarChain = ScalarOp.getValue(1); 2264 2265 if (Node->getOpcode() == ISD::STRICT_FSETCC || 2266 Node->getOpcode() == ISD::STRICT_FSETCCS) 2267 ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult, 2268 DAG.getAllOnesConstant(dl, EltVT), 2269 DAG.getConstant(0, dl, EltVT)); 2270 2271 OpValues.push_back(ScalarResult); 2272 OpChains.push_back(ScalarChain); 2273 } 2274 2275 SDValue Result = DAG.getBuildVector(VT, dl, OpValues); 2276 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); 2277 2278 Results.push_back(Result); 2279 Results.push_back(NewChain); 2280 } 2281 2282 SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) { 2283 EVT VT = Node->getValueType(0); 2284 unsigned NumElems = VT.getVectorNumElements(); 2285 EVT EltVT = VT.getVectorElementType(); 2286 SDValue LHS = Node->getOperand(0); 2287 SDValue RHS = Node->getOperand(1); 2288 SDValue CC = Node->getOperand(2); 2289 EVT TmpEltVT = LHS.getValueType().getVectorElementType(); 2290 SDLoc dl(Node); 2291 SmallVector<SDValue, 8> Ops(NumElems); 2292 for (unsigned i = 0; i < NumElems; ++i) { 2293 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, 2294 DAG.getVectorIdxConstant(i, dl)); 2295 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, 2296 DAG.getVectorIdxConstant(i, dl)); 2297 // FIXME: We should use i1 setcc + boolext here, but it causes regressions. 2298 Ops[i] = DAG.getNode(ISD::SETCC, dl, 2299 TLI.getSetCCResultType(DAG.getDataLayout(), 2300 *DAG.getContext(), TmpEltVT), 2301 LHSElem, RHSElem, CC); 2302 Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], 2303 DAG.getBoolConstant(true, dl, EltVT, VT), 2304 DAG.getConstant(0, dl, EltVT)); 2305 } 2306 return DAG.getBuildVector(VT, dl, Ops); 2307 } 2308 2309 bool SelectionDAG::LegalizeVectors() { 2310 return VectorLegalizer(*this).Run(); 2311 } 2312