1 //===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file performs vector type splitting and scalarization for LegalizeTypes. 10 // Scalarization is the act of changing a computation in an illegal one-element 11 // vector type to be a computation in its scalar element type. For example, 12 // implementing <1 x f32> arithmetic in a scalar f32 register. This is needed 13 // as a base case when scalarizing vector arithmetic like <4 x f32>, which 14 // eventually decomposes to scalars if the target doesn't support v4f32 or v2f32 15 // types. 16 // Splitting is the act of changing a computation in an invalid vector type to 17 // be a computation in two vectors of half the size. For example, implementing 18 // <128 x f32> operations in terms of two <64 x f32> operations. 19 // 20 //===----------------------------------------------------------------------===// 21 22 #include "LegalizeTypes.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/Analysis/MemoryLocation.h" 25 #include "llvm/Analysis/VectorUtils.h" 26 #include "llvm/CodeGen/ISDOpcodes.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/TypeSize.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include <numeric> 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "legalize-types" 36 37 //===----------------------------------------------------------------------===// 38 // Result Vector Scalarization: <1 x ty> -> ty. 39 //===----------------------------------------------------------------------===// 40 41 void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { 42 LLVM_DEBUG(dbgs() << "Scalarize node result " << ResNo << ": "; 43 N->dump(&DAG)); 44 SDValue R = SDValue(); 45 46 switch (N->getOpcode()) { 47 default: 48 #ifndef NDEBUG 49 dbgs() << "ScalarizeVectorResult #" << ResNo << ": "; 50 N->dump(&DAG); 51 dbgs() << "\n"; 52 #endif 53 report_fatal_error("Do not know how to scalarize the result of this " 54 "operator!\n"); 55 56 case ISD::MERGE_VALUES: R = ScalarizeVecRes_MERGE_VALUES(N, ResNo);break; 57 case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break; 58 case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break; 59 case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break; 60 case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break; 61 case ISD::AssertZext: 62 case ISD::AssertSext: 63 case ISD::FPOWI: 64 R = ScalarizeVecRes_UnaryOpWithExtraInput(N); 65 break; 66 case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break; 67 case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break; 68 case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break; 69 case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break; 70 case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break; 71 case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break; 72 case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break; 73 case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break; 74 case ISD::UNDEF: R = ScalarizeVecRes_UNDEF(N); break; 75 case ISD::VECTOR_SHUFFLE: R = ScalarizeVecRes_VECTOR_SHUFFLE(N); break; 76 case ISD::IS_FPCLASS: R = ScalarizeVecRes_IS_FPCLASS(N); break; 77 case ISD::ANY_EXTEND_VECTOR_INREG: 78 case ISD::SIGN_EXTEND_VECTOR_INREG: 79 case ISD::ZERO_EXTEND_VECTOR_INREG: 80 R = ScalarizeVecRes_VecInregOp(N); 81 break; 82 case ISD::ABS: 83 case ISD::ANY_EXTEND: 84 case ISD::BITREVERSE: 85 case ISD::BSWAP: 86 case ISD::CTLZ: 87 case ISD::CTLZ_ZERO_UNDEF: 88 case ISD::CTPOP: 89 case ISD::CTTZ: 90 case ISD::CTTZ_ZERO_UNDEF: 91 case ISD::FABS: 92 case ISD::FACOS: 93 case ISD::FASIN: 94 case ISD::FATAN: 95 case ISD::FCEIL: 96 case ISD::FCOS: 97 case ISD::FCOSH: 98 case ISD::FEXP: 99 case ISD::FEXP2: 100 case ISD::FEXP10: 101 case ISD::FFLOOR: 102 case ISD::FLOG: 103 case ISD::FLOG10: 104 case ISD::FLOG2: 105 case ISD::FNEARBYINT: 106 case ISD::FNEG: 107 case ISD::FREEZE: 108 case ISD::ARITH_FENCE: 109 case ISD::FP_EXTEND: 110 case ISD::FP_TO_SINT: 111 case ISD::FP_TO_UINT: 112 case ISD::FRINT: 113 case ISD::LRINT: 114 case ISD::LLRINT: 115 case ISD::FROUND: 116 case ISD::FROUNDEVEN: 117 case ISD::LROUND: 118 case ISD::LLROUND: 119 case ISD::FSIN: 120 case ISD::FSINH: 121 case ISD::FSQRT: 122 case ISD::FTAN: 123 case ISD::FTANH: 124 case ISD::FTRUNC: 125 case ISD::SIGN_EXTEND: 126 case ISD::SINT_TO_FP: 127 case ISD::TRUNCATE: 128 case ISD::UINT_TO_FP: 129 case ISD::ZERO_EXTEND: 130 case ISD::FCANONICALIZE: 131 R = ScalarizeVecRes_UnaryOp(N); 132 break; 133 case ISD::ADDRSPACECAST: 134 R = ScalarizeVecRes_ADDRSPACECAST(N); 135 break; 136 case ISD::FFREXP: 137 case ISD::FSINCOS: 138 R = ScalarizeVecRes_UnaryOpWithTwoResults(N, ResNo); 139 break; 140 case ISD::ADD: 141 case ISD::AND: 142 case ISD::AVGCEILS: 143 case ISD::AVGCEILU: 144 case ISD::AVGFLOORS: 145 case ISD::AVGFLOORU: 146 case ISD::FADD: 147 case ISD::FCOPYSIGN: 148 case ISD::FDIV: 149 case ISD::FMUL: 150 case ISD::FMINNUM: 151 case ISD::FMAXNUM: 152 case ISD::FMINNUM_IEEE: 153 case ISD::FMAXNUM_IEEE: 154 case ISD::FMINIMUM: 155 case ISD::FMAXIMUM: 156 case ISD::FMINIMUMNUM: 157 case ISD::FMAXIMUMNUM: 158 case ISD::FLDEXP: 159 case ISD::ABDS: 160 case ISD::ABDU: 161 case ISD::SMIN: 162 case ISD::SMAX: 163 case ISD::UMIN: 164 case ISD::UMAX: 165 166 case ISD::SADDSAT: 167 case ISD::UADDSAT: 168 case ISD::SSUBSAT: 169 case ISD::USUBSAT: 170 case ISD::SSHLSAT: 171 case ISD::USHLSAT: 172 173 case ISD::FPOW: 174 case ISD::FATAN2: 175 case ISD::FREM: 176 case ISD::FSUB: 177 case ISD::MUL: 178 case ISD::MULHS: 179 case ISD::MULHU: 180 case ISD::OR: 181 case ISD::SDIV: 182 case ISD::SREM: 183 case ISD::SUB: 184 case ISD::UDIV: 185 case ISD::UREM: 186 case ISD::XOR: 187 case ISD::SHL: 188 case ISD::SRA: 189 case ISD::SRL: 190 case ISD::ROTL: 191 case ISD::ROTR: 192 R = ScalarizeVecRes_BinOp(N); 193 break; 194 195 case ISD::SCMP: 196 case ISD::UCMP: 197 R = ScalarizeVecRes_CMP(N); 198 break; 199 200 case ISD::FMA: 201 case ISD::FSHL: 202 case ISD::FSHR: 203 R = ScalarizeVecRes_TernaryOp(N); 204 break; 205 206 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 207 case ISD::STRICT_##DAGN: 208 #include "llvm/IR/ConstrainedOps.def" 209 R = ScalarizeVecRes_StrictFPOp(N); 210 break; 211 212 case ISD::FP_TO_UINT_SAT: 213 case ISD::FP_TO_SINT_SAT: 214 R = ScalarizeVecRes_FP_TO_XINT_SAT(N); 215 break; 216 217 case ISD::UADDO: 218 case ISD::SADDO: 219 case ISD::USUBO: 220 case ISD::SSUBO: 221 case ISD::UMULO: 222 case ISD::SMULO: 223 R = ScalarizeVecRes_OverflowOp(N, ResNo); 224 break; 225 case ISD::SMULFIX: 226 case ISD::SMULFIXSAT: 227 case ISD::UMULFIX: 228 case ISD::UMULFIXSAT: 229 case ISD::SDIVFIX: 230 case ISD::SDIVFIXSAT: 231 case ISD::UDIVFIX: 232 case ISD::UDIVFIXSAT: 233 R = ScalarizeVecRes_FIX(N); 234 break; 235 } 236 237 // If R is null, the sub-method took care of registering the result. 238 if (R.getNode()) 239 SetScalarizedVector(SDValue(N, ResNo), R); 240 } 241 242 SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { 243 SDValue LHS = GetScalarizedVector(N->getOperand(0)); 244 SDValue RHS = GetScalarizedVector(N->getOperand(1)); 245 return DAG.getNode(N->getOpcode(), SDLoc(N), 246 LHS.getValueType(), LHS, RHS, N->getFlags()); 247 } 248 249 SDValue DAGTypeLegalizer::ScalarizeVecRes_CMP(SDNode *N) { 250 SDLoc DL(N); 251 252 SDValue LHS = N->getOperand(0); 253 SDValue RHS = N->getOperand(1); 254 if (getTypeAction(LHS.getValueType()) == 255 TargetLowering::TypeScalarizeVector) { 256 LHS = GetScalarizedVector(LHS); 257 RHS = GetScalarizedVector(RHS); 258 } else { 259 EVT VT = LHS.getValueType().getVectorElementType(); 260 LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, 261 DAG.getVectorIdxConstant(0, DL)); 262 RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, 263 DAG.getVectorIdxConstant(0, DL)); 264 } 265 266 return DAG.getNode(N->getOpcode(), SDLoc(N), 267 N->getValueType(0).getVectorElementType(), LHS, RHS); 268 } 269 270 SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { 271 SDValue Op0 = GetScalarizedVector(N->getOperand(0)); 272 SDValue Op1 = GetScalarizedVector(N->getOperand(1)); 273 SDValue Op2 = GetScalarizedVector(N->getOperand(2)); 274 return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, 275 Op2, N->getFlags()); 276 } 277 278 SDValue DAGTypeLegalizer::ScalarizeVecRes_FIX(SDNode *N) { 279 SDValue Op0 = GetScalarizedVector(N->getOperand(0)); 280 SDValue Op1 = GetScalarizedVector(N->getOperand(1)); 281 SDValue Op2 = N->getOperand(2); 282 return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, 283 Op2, N->getFlags()); 284 } 285 286 SDValue 287 DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithTwoResults(SDNode *N, 288 unsigned ResNo) { 289 assert(N->getValueType(0).getVectorNumElements() == 1 && 290 "Unexpected vector type!"); 291 SDValue Elt = GetScalarizedVector(N->getOperand(0)); 292 293 EVT VT0 = N->getValueType(0); 294 EVT VT1 = N->getValueType(1); 295 SDLoc dl(N); 296 297 SDNode *ScalarNode = 298 DAG.getNode(N->getOpcode(), dl, 299 {VT0.getScalarType(), VT1.getScalarType()}, Elt) 300 .getNode(); 301 302 // Replace the other vector result not being explicitly scalarized here. 303 unsigned OtherNo = 1 - ResNo; 304 EVT OtherVT = N->getValueType(OtherNo); 305 if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) { 306 SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo)); 307 } else { 308 SDValue OtherVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OtherVT, 309 SDValue(ScalarNode, OtherNo)); 310 ReplaceValueWith(SDValue(N, OtherNo), OtherVal); 311 } 312 313 return SDValue(ScalarNode, ResNo); 314 } 315 316 SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { 317 EVT VT = N->getValueType(0).getVectorElementType(); 318 unsigned NumOpers = N->getNumOperands(); 319 SDValue Chain = N->getOperand(0); 320 EVT ValueVTs[] = {VT, MVT::Other}; 321 SDLoc dl(N); 322 323 SmallVector<SDValue, 4> Opers(NumOpers); 324 325 // The Chain is the first operand. 326 Opers[0] = Chain; 327 328 // Now process the remaining operands. 329 for (unsigned i = 1; i < NumOpers; ++i) { 330 SDValue Oper = N->getOperand(i); 331 EVT OperVT = Oper.getValueType(); 332 333 if (OperVT.isVector()) { 334 if (getTypeAction(OperVT) == TargetLowering::TypeScalarizeVector) 335 Oper = GetScalarizedVector(Oper); 336 else 337 Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 338 OperVT.getVectorElementType(), Oper, 339 DAG.getVectorIdxConstant(0, dl)); 340 } 341 342 Opers[i] = Oper; 343 } 344 345 SDValue Result = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(ValueVTs), 346 Opers, N->getFlags()); 347 348 // Legalize the chain result - switch anything that used the old chain to 349 // use the new one. 350 ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); 351 return Result; 352 } 353 354 SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N, 355 unsigned ResNo) { 356 SDLoc DL(N); 357 EVT ResVT = N->getValueType(0); 358 EVT OvVT = N->getValueType(1); 359 360 SDValue ScalarLHS, ScalarRHS; 361 if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) { 362 ScalarLHS = GetScalarizedVector(N->getOperand(0)); 363 ScalarRHS = GetScalarizedVector(N->getOperand(1)); 364 } else { 365 SmallVector<SDValue, 1> ElemsLHS, ElemsRHS; 366 DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS); 367 DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS); 368 ScalarLHS = ElemsLHS[0]; 369 ScalarRHS = ElemsRHS[0]; 370 } 371 372 SDVTList ScalarVTs = DAG.getVTList( 373 ResVT.getVectorElementType(), OvVT.getVectorElementType()); 374 SDNode *ScalarNode = DAG.getNode( 375 N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode(); 376 ScalarNode->setFlags(N->getFlags()); 377 378 // Replace the other vector result not being explicitly scalarized here. 379 unsigned OtherNo = 1 - ResNo; 380 EVT OtherVT = N->getValueType(OtherNo); 381 if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) { 382 SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo)); 383 } else { 384 SDValue OtherVal = DAG.getNode( 385 ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo)); 386 ReplaceValueWith(SDValue(N, OtherNo), OtherVal); 387 } 388 389 return SDValue(ScalarNode, ResNo); 390 } 391 392 SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, 393 unsigned ResNo) { 394 SDValue Op = DisintegrateMERGE_VALUES(N, ResNo); 395 return GetScalarizedVector(Op); 396 } 397 398 SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { 399 SDValue Op = N->getOperand(0); 400 if (Op.getValueType().isVector() 401 && Op.getValueType().getVectorNumElements() == 1 402 && !isSimpleLegalType(Op.getValueType())) 403 Op = GetScalarizedVector(Op); 404 EVT NewVT = N->getValueType(0).getVectorElementType(); 405 return DAG.getNode(ISD::BITCAST, SDLoc(N), 406 NewVT, Op); 407 } 408 409 SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { 410 EVT EltVT = N->getValueType(0).getVectorElementType(); 411 SDValue InOp = N->getOperand(0); 412 // The BUILD_VECTOR operands may be of wider element types and 413 // we may need to truncate them back to the requested return type. 414 if (EltVT.isInteger()) 415 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); 416 return InOp; 417 } 418 419 SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { 420 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), 421 N->getValueType(0).getVectorElementType(), 422 N->getOperand(0), N->getOperand(1)); 423 } 424 425 SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { 426 SDLoc DL(N); 427 SDValue Op = N->getOperand(0); 428 EVT OpVT = Op.getValueType(); 429 // The result needs scalarizing, but it's not a given that the source does. 430 // See similar logic in ScalarizeVecRes_UnaryOp. 431 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { 432 Op = GetScalarizedVector(Op); 433 } else { 434 EVT VT = OpVT.getVectorElementType(); 435 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, 436 DAG.getVectorIdxConstant(0, DL)); 437 } 438 return DAG.getNode(ISD::FP_ROUND, DL, 439 N->getValueType(0).getVectorElementType(), Op, 440 N->getOperand(1)); 441 } 442 443 SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOpWithExtraInput(SDNode *N) { 444 SDValue Op = GetScalarizedVector(N->getOperand(0)); 445 return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op, 446 N->getOperand(1)); 447 } 448 449 SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { 450 // The value to insert may have a wider type than the vector element type, 451 // so be sure to truncate it to the element type if necessary. 452 SDValue Op = N->getOperand(1); 453 EVT EltVT = N->getValueType(0).getVectorElementType(); 454 if (Op.getValueType() != EltVT) 455 // FIXME: Can this happen for floating point types? 456 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op); 457 return Op; 458 } 459 460 SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { 461 assert(N->isUnindexed() && "Indexed vector load?"); 462 463 SDValue Result = DAG.getLoad( 464 ISD::UNINDEXED, N->getExtensionType(), 465 N->getValueType(0).getVectorElementType(), SDLoc(N), N->getChain(), 466 N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), 467 N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), 468 N->getOriginalAlign(), N->getMemOperand()->getFlags(), N->getAAInfo()); 469 470 // Legalize the chain result - switch anything that used the old chain to 471 // use the new one. 472 ReplaceValueWith(SDValue(N, 1), Result.getValue(1)); 473 return Result; 474 } 475 476 SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { 477 // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. 478 EVT DestVT = N->getValueType(0).getVectorElementType(); 479 SDValue Op = N->getOperand(0); 480 EVT OpVT = Op.getValueType(); 481 SDLoc DL(N); 482 // The result needs scalarizing, but it's not a given that the source does. 483 // This is a workaround for targets where it's impossible to scalarize the 484 // result of a conversion, because the source type is legal. 485 // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} 486 // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is 487 // legal and was not scalarized. 488 // See the similar logic in ScalarizeVecRes_SETCC 489 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { 490 Op = GetScalarizedVector(Op); 491 } else { 492 EVT VT = OpVT.getVectorElementType(); 493 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, 494 DAG.getVectorIdxConstant(0, DL)); 495 } 496 return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op, N->getFlags()); 497 } 498 499 SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { 500 EVT EltVT = N->getValueType(0).getVectorElementType(); 501 EVT ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT().getVectorElementType(); 502 SDValue LHS = GetScalarizedVector(N->getOperand(0)); 503 return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT, 504 LHS, DAG.getValueType(ExtVT)); 505 } 506 507 SDValue DAGTypeLegalizer::ScalarizeVecRes_VecInregOp(SDNode *N) { 508 SDLoc DL(N); 509 SDValue Op = N->getOperand(0); 510 511 EVT OpVT = Op.getValueType(); 512 EVT OpEltVT = OpVT.getVectorElementType(); 513 EVT EltVT = N->getValueType(0).getVectorElementType(); 514 515 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { 516 Op = GetScalarizedVector(Op); 517 } else { 518 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpEltVT, Op, 519 DAG.getVectorIdxConstant(0, DL)); 520 } 521 522 switch (N->getOpcode()) { 523 case ISD::ANY_EXTEND_VECTOR_INREG: 524 return DAG.getNode(ISD::ANY_EXTEND, DL, EltVT, Op); 525 case ISD::SIGN_EXTEND_VECTOR_INREG: 526 return DAG.getNode(ISD::SIGN_EXTEND, DL, EltVT, Op); 527 case ISD::ZERO_EXTEND_VECTOR_INREG: 528 return DAG.getNode(ISD::ZERO_EXTEND, DL, EltVT, Op); 529 } 530 531 llvm_unreachable("Illegal extend_vector_inreg opcode"); 532 } 533 534 SDValue DAGTypeLegalizer::ScalarizeVecRes_ADDRSPACECAST(SDNode *N) { 535 EVT DestVT = N->getValueType(0).getVectorElementType(); 536 SDValue Op = N->getOperand(0); 537 EVT OpVT = Op.getValueType(); 538 SDLoc DL(N); 539 // The result needs scalarizing, but it's not a given that the source does. 540 // This is a workaround for targets where it's impossible to scalarize the 541 // result of a conversion, because the source type is legal. 542 // For instance, this happens on AArch64: v1i1 is illegal but v1i{8,16,32} 543 // are widened to v8i8, v4i16, and v2i32, which is legal, because v1i64 is 544 // legal and was not scalarized. 545 // See the similar logic in ScalarizeVecRes_SETCC 546 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { 547 Op = GetScalarizedVector(Op); 548 } else { 549 EVT VT = OpVT.getVectorElementType(); 550 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, 551 DAG.getVectorIdxConstant(0, DL)); 552 } 553 auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N); 554 unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace(); 555 unsigned DestAS = AddrSpaceCastN->getDestAddressSpace(); 556 return DAG.getAddrSpaceCast(DL, DestVT, Op, SrcAS, DestAS); 557 } 558 559 SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { 560 // If the operand is wider than the vector element type then it is implicitly 561 // truncated. Make that explicit here. 562 EVT EltVT = N->getValueType(0).getVectorElementType(); 563 SDValue InOp = N->getOperand(0); 564 if (InOp.getValueType() != EltVT) 565 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); 566 return InOp; 567 } 568 569 SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { 570 SDValue Cond = N->getOperand(0); 571 EVT OpVT = Cond.getValueType(); 572 SDLoc DL(N); 573 // The vselect result and true/value operands needs scalarizing, but it's 574 // not a given that the Cond does. For instance, in AVX512 v1i1 is legal. 575 // See the similar logic in ScalarizeVecRes_SETCC 576 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { 577 Cond = GetScalarizedVector(Cond); 578 } else { 579 EVT VT = OpVT.getVectorElementType(); 580 Cond = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Cond, 581 DAG.getVectorIdxConstant(0, DL)); 582 } 583 584 SDValue LHS = GetScalarizedVector(N->getOperand(1)); 585 TargetLowering::BooleanContent ScalarBool = 586 TLI.getBooleanContents(false, false); 587 TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); 588 589 // If integer and float booleans have different contents then we can't 590 // reliably optimize in all cases. There is a full explanation for this in 591 // DAGCombiner::visitSELECT() where the same issue affects folding 592 // (select C, 0, 1) to (xor C, 1). 593 if (TLI.getBooleanContents(false, false) != 594 TLI.getBooleanContents(false, true)) { 595 // At least try the common case where the boolean is generated by a 596 // comparison. 597 if (Cond->getOpcode() == ISD::SETCC) { 598 EVT OpVT = Cond->getOperand(0).getValueType(); 599 ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); 600 VecBool = TLI.getBooleanContents(OpVT); 601 } else 602 ScalarBool = TargetLowering::UndefinedBooleanContent; 603 } 604 605 EVT CondVT = Cond.getValueType(); 606 if (ScalarBool != VecBool) { 607 switch (ScalarBool) { 608 case TargetLowering::UndefinedBooleanContent: 609 break; 610 case TargetLowering::ZeroOrOneBooleanContent: 611 assert(VecBool == TargetLowering::UndefinedBooleanContent || 612 VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); 613 // Vector read from all ones, scalar expects a single 1 so mask. 614 Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, 615 Cond, DAG.getConstant(1, SDLoc(N), CondVT)); 616 break; 617 case TargetLowering::ZeroOrNegativeOneBooleanContent: 618 assert(VecBool == TargetLowering::UndefinedBooleanContent || 619 VecBool == TargetLowering::ZeroOrOneBooleanContent); 620 // Vector reads from a one, scalar from all ones so sign extend. 621 Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT, 622 Cond, DAG.getValueType(MVT::i1)); 623 break; 624 } 625 } 626 627 // Truncate the condition if needed 628 auto BoolVT = getSetCCResultType(CondVT); 629 if (BoolVT.bitsLT(CondVT)) 630 Cond = DAG.getNode(ISD::TRUNCATE, SDLoc(N), BoolVT, Cond); 631 632 return DAG.getSelect(SDLoc(N), 633 LHS.getValueType(), Cond, LHS, 634 GetScalarizedVector(N->getOperand(2))); 635 } 636 637 SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { 638 SDValue LHS = GetScalarizedVector(N->getOperand(1)); 639 return DAG.getSelect(SDLoc(N), 640 LHS.getValueType(), N->getOperand(0), LHS, 641 GetScalarizedVector(N->getOperand(2))); 642 } 643 644 SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { 645 SDValue LHS = GetScalarizedVector(N->getOperand(2)); 646 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), 647 N->getOperand(0), N->getOperand(1), 648 LHS, GetScalarizedVector(N->getOperand(3)), 649 N->getOperand(4)); 650 } 651 652 SDValue DAGTypeLegalizer::ScalarizeVecRes_UNDEF(SDNode *N) { 653 return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); 654 } 655 656 SDValue DAGTypeLegalizer::ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N) { 657 // Figure out if the scalar is the LHS or RHS and return it. 658 SDValue Arg = N->getOperand(2).getOperand(0); 659 if (Arg.isUndef()) 660 return DAG.getUNDEF(N->getValueType(0).getVectorElementType()); 661 unsigned Op = !cast<ConstantSDNode>(Arg)->isZero(); 662 return GetScalarizedVector(N->getOperand(Op)); 663 } 664 665 SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_TO_XINT_SAT(SDNode *N) { 666 SDValue Src = N->getOperand(0); 667 EVT SrcVT = Src.getValueType(); 668 SDLoc dl(N); 669 670 // Handle case where result is scalarized but operand is not 671 if (getTypeAction(SrcVT) == TargetLowering::TypeScalarizeVector) 672 Src = GetScalarizedVector(Src); 673 else 674 Src = DAG.getNode( 675 ISD::EXTRACT_VECTOR_ELT, dl, SrcVT.getVectorElementType(), Src, 676 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); 677 678 EVT DstVT = N->getValueType(0).getVectorElementType(); 679 return DAG.getNode(N->getOpcode(), dl, DstVT, Src, N->getOperand(1)); 680 } 681 682 SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { 683 assert(N->getValueType(0).isVector() && 684 N->getOperand(0).getValueType().isVector() && 685 "Operand types must be vectors"); 686 SDValue LHS = N->getOperand(0); 687 SDValue RHS = N->getOperand(1); 688 EVT OpVT = LHS.getValueType(); 689 EVT NVT = N->getValueType(0).getVectorElementType(); 690 SDLoc DL(N); 691 692 // The result needs scalarizing, but it's not a given that the source does. 693 if (getTypeAction(OpVT) == TargetLowering::TypeScalarizeVector) { 694 LHS = GetScalarizedVector(LHS); 695 RHS = GetScalarizedVector(RHS); 696 } else { 697 EVT VT = OpVT.getVectorElementType(); 698 LHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, LHS, 699 DAG.getVectorIdxConstant(0, DL)); 700 RHS = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, RHS, 701 DAG.getVectorIdxConstant(0, DL)); 702 } 703 704 // Turn it into a scalar SETCC. 705 SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, 706 N->getOperand(2)); 707 // Vectors may have a different boolean contents to scalars. Promote the 708 // value appropriately. 709 ISD::NodeType ExtendCode = 710 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); 711 return DAG.getNode(ExtendCode, DL, NVT, Res); 712 } 713 714 SDValue DAGTypeLegalizer::ScalarizeVecRes_IS_FPCLASS(SDNode *N) { 715 SDLoc DL(N); 716 SDValue Arg = N->getOperand(0); 717 SDValue Test = N->getOperand(1); 718 EVT ArgVT = Arg.getValueType(); 719 EVT ResultVT = N->getValueType(0).getVectorElementType(); 720 721 if (getTypeAction(ArgVT) == TargetLowering::TypeScalarizeVector) { 722 Arg = GetScalarizedVector(Arg); 723 } else { 724 EVT VT = ArgVT.getVectorElementType(); 725 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Arg, 726 DAG.getVectorIdxConstant(0, DL)); 727 } 728 729 SDValue Res = 730 DAG.getNode(ISD::IS_FPCLASS, DL, MVT::i1, {Arg, Test}, N->getFlags()); 731 // Vectors may have a different boolean contents to scalars. Promote the 732 // value appropriately. 733 ISD::NodeType ExtendCode = 734 TargetLowering::getExtendForContent(TLI.getBooleanContents(ArgVT)); 735 return DAG.getNode(ExtendCode, DL, ResultVT, Res); 736 } 737 738 //===----------------------------------------------------------------------===// 739 // Operand Vector Scalarization <1 x ty> -> ty. 740 //===----------------------------------------------------------------------===// 741 742 bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { 743 LLVM_DEBUG(dbgs() << "Scalarize node operand " << OpNo << ": "; 744 N->dump(&DAG)); 745 SDValue Res = SDValue(); 746 747 switch (N->getOpcode()) { 748 default: 749 #ifndef NDEBUG 750 dbgs() << "ScalarizeVectorOperand Op #" << OpNo << ": "; 751 N->dump(&DAG); 752 dbgs() << "\n"; 753 #endif 754 report_fatal_error("Do not know how to scalarize this operator's " 755 "operand!\n"); 756 case ISD::BITCAST: 757 Res = ScalarizeVecOp_BITCAST(N); 758 break; 759 case ISD::FAKE_USE: 760 Res = ScalarizeVecOp_FAKE_USE(N); 761 break; 762 case ISD::ANY_EXTEND: 763 case ISD::ZERO_EXTEND: 764 case ISD::SIGN_EXTEND: 765 case ISD::TRUNCATE: 766 case ISD::FP_TO_SINT: 767 case ISD::FP_TO_UINT: 768 case ISD::SINT_TO_FP: 769 case ISD::UINT_TO_FP: 770 case ISD::LROUND: 771 case ISD::LLROUND: 772 case ISD::LRINT: 773 case ISD::LLRINT: 774 Res = ScalarizeVecOp_UnaryOp(N); 775 break; 776 case ISD::STRICT_SINT_TO_FP: 777 case ISD::STRICT_UINT_TO_FP: 778 case ISD::STRICT_FP_TO_SINT: 779 case ISD::STRICT_FP_TO_UINT: 780 Res = ScalarizeVecOp_UnaryOp_StrictFP(N); 781 break; 782 case ISD::CONCAT_VECTORS: 783 Res = ScalarizeVecOp_CONCAT_VECTORS(N); 784 break; 785 case ISD::INSERT_SUBVECTOR: 786 Res = ScalarizeVecOp_INSERT_SUBVECTOR(N, OpNo); 787 break; 788 case ISD::EXTRACT_VECTOR_ELT: 789 Res = ScalarizeVecOp_EXTRACT_VECTOR_ELT(N); 790 break; 791 case ISD::VSELECT: 792 Res = ScalarizeVecOp_VSELECT(N); 793 break; 794 case ISD::SETCC: 795 Res = ScalarizeVecOp_VSETCC(N); 796 break; 797 case ISD::STORE: 798 Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo); 799 break; 800 case ISD::STRICT_FP_ROUND: 801 Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo); 802 break; 803 case ISD::FP_ROUND: 804 Res = ScalarizeVecOp_FP_ROUND(N, OpNo); 805 break; 806 case ISD::STRICT_FP_EXTEND: 807 Res = ScalarizeVecOp_STRICT_FP_EXTEND(N); 808 break; 809 case ISD::FP_EXTEND: 810 Res = ScalarizeVecOp_FP_EXTEND(N); 811 break; 812 case ISD::VECREDUCE_FADD: 813 case ISD::VECREDUCE_FMUL: 814 case ISD::VECREDUCE_ADD: 815 case ISD::VECREDUCE_MUL: 816 case ISD::VECREDUCE_AND: 817 case ISD::VECREDUCE_OR: 818 case ISD::VECREDUCE_XOR: 819 case ISD::VECREDUCE_SMAX: 820 case ISD::VECREDUCE_SMIN: 821 case ISD::VECREDUCE_UMAX: 822 case ISD::VECREDUCE_UMIN: 823 case ISD::VECREDUCE_FMAX: 824 case ISD::VECREDUCE_FMIN: 825 case ISD::VECREDUCE_FMAXIMUM: 826 case ISD::VECREDUCE_FMINIMUM: 827 Res = ScalarizeVecOp_VECREDUCE(N); 828 break; 829 case ISD::VECREDUCE_SEQ_FADD: 830 case ISD::VECREDUCE_SEQ_FMUL: 831 Res = ScalarizeVecOp_VECREDUCE_SEQ(N); 832 break; 833 case ISD::SCMP: 834 case ISD::UCMP: 835 Res = ScalarizeVecOp_CMP(N); 836 break; 837 } 838 839 // If the result is null, the sub-method took care of registering results etc. 840 if (!Res.getNode()) return false; 841 842 // If the result is N, the sub-method updated N in place. Tell the legalizer 843 // core about this. 844 if (Res.getNode() == N) 845 return true; 846 847 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && 848 "Invalid operand expansion"); 849 850 ReplaceValueWith(SDValue(N, 0), Res); 851 return false; 852 } 853 854 /// If the value to convert is a vector that needs to be scalarized, it must be 855 /// <1 x ty>. Convert the element instead. 856 SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { 857 SDValue Elt = GetScalarizedVector(N->getOperand(0)); 858 return DAG.getNode(ISD::BITCAST, SDLoc(N), 859 N->getValueType(0), Elt); 860 } 861 862 // Need to legalize vector operands of fake uses. Must be <1 x ty>. 863 SDValue DAGTypeLegalizer::ScalarizeVecOp_FAKE_USE(SDNode *N) { 864 assert(N->getOperand(1).getValueType().getVectorNumElements() == 1 && 865 "Fake Use: Unexpected vector type!"); 866 SDValue Elt = GetScalarizedVector(N->getOperand(1)); 867 return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Elt); 868 } 869 870 /// If the input is a vector that needs to be scalarized, it must be <1 x ty>. 871 /// Do the operation on the element instead. 872 SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { 873 assert(N->getValueType(0).getVectorNumElements() == 1 && 874 "Unexpected vector type!"); 875 SDValue Elt = GetScalarizedVector(N->getOperand(0)); 876 SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), 877 N->getValueType(0).getScalarType(), Elt); 878 // Revectorize the result so the types line up with what the uses of this 879 // expression expect. 880 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op); 881 } 882 883 /// If the input is a vector that needs to be scalarized, it must be <1 x ty>. 884 /// Do the strict FP operation on the element instead. 885 SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) { 886 assert(N->getValueType(0).getVectorNumElements() == 1 && 887 "Unexpected vector type!"); 888 SDValue Elt = GetScalarizedVector(N->getOperand(1)); 889 SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), 890 { N->getValueType(0).getScalarType(), MVT::Other }, 891 { N->getOperand(0), Elt }); 892 // Legalize the chain result - switch anything that used the old chain to 893 // use the new one. 894 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 895 // Revectorize the result so the types line up with what the uses of this 896 // expression expect. 897 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); 898 899 // Do our own replacement and return SDValue() to tell the caller that we 900 // handled all replacements since caller can only handle a single result. 901 ReplaceValueWith(SDValue(N, 0), Res); 902 return SDValue(); 903 } 904 905 /// The vectors to concatenate have length one - use a BUILD_VECTOR instead. 906 SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { 907 SmallVector<SDValue, 8> Ops(N->getNumOperands()); 908 for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) 909 Ops[i] = GetScalarizedVector(N->getOperand(i)); 910 return DAG.getBuildVector(N->getValueType(0), SDLoc(N), Ops); 911 } 912 913 /// The inserted subvector is to be scalarized - use insert vector element 914 /// instead. 915 SDValue DAGTypeLegalizer::ScalarizeVecOp_INSERT_SUBVECTOR(SDNode *N, 916 unsigned OpNo) { 917 // We should not be attempting to scalarize the containing vector 918 assert(OpNo == 1); 919 SDValue Elt = GetScalarizedVector(N->getOperand(1)); 920 SDValue ContainingVec = N->getOperand(0); 921 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), 922 ContainingVec.getValueType(), ContainingVec, Elt, 923 N->getOperand(2)); 924 } 925 926 /// If the input is a vector that needs to be scalarized, it must be <1 x ty>, 927 /// so just return the element, ignoring the index. 928 SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { 929 EVT VT = N->getValueType(0); 930 SDValue Res = GetScalarizedVector(N->getOperand(0)); 931 if (Res.getValueType() != VT) 932 Res = VT.isFloatingPoint() 933 ? DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Res) 934 : DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Res); 935 return Res; 936 } 937 938 /// If the input condition is a vector that needs to be scalarized, it must be 939 /// <1 x i1>, so just convert to a normal ISD::SELECT 940 /// (still with vector output type since that was acceptable if we got here). 941 SDValue DAGTypeLegalizer::ScalarizeVecOp_VSELECT(SDNode *N) { 942 SDValue ScalarCond = GetScalarizedVector(N->getOperand(0)); 943 EVT VT = N->getValueType(0); 944 945 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, ScalarCond, N->getOperand(1), 946 N->getOperand(2)); 947 } 948 949 /// If the operand is a vector that needs to be scalarized then the 950 /// result must be v1i1, so just convert to a scalar SETCC and wrap 951 /// with a scalar_to_vector since the res type is legal if we got here 952 SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) { 953 assert(N->getValueType(0).isVector() && 954 N->getOperand(0).getValueType().isVector() && 955 "Operand types must be vectors"); 956 assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type"); 957 958 EVT VT = N->getValueType(0); 959 SDValue LHS = GetScalarizedVector(N->getOperand(0)); 960 SDValue RHS = GetScalarizedVector(N->getOperand(1)); 961 962 EVT OpVT = N->getOperand(0).getValueType(); 963 EVT NVT = VT.getVectorElementType(); 964 SDLoc DL(N); 965 // Turn it into a scalar SETCC. 966 SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, 967 N->getOperand(2)); 968 969 // Vectors may have a different boolean contents to scalars. Promote the 970 // value appropriately. 971 ISD::NodeType ExtendCode = 972 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); 973 974 Res = DAG.getNode(ExtendCode, DL, NVT, Res); 975 976 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res); 977 } 978 979 /// If the value to store is a vector that needs to be scalarized, it must be 980 /// <1 x ty>. Just store the element. 981 SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ 982 assert(N->isUnindexed() && "Indexed store of one-element vector?"); 983 assert(OpNo == 1 && "Do not know how to scalarize this operand!"); 984 SDLoc dl(N); 985 986 if (N->isTruncatingStore()) 987 return DAG.getTruncStore( 988 N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), 989 N->getBasePtr(), N->getPointerInfo(), 990 N->getMemoryVT().getVectorElementType(), N->getOriginalAlign(), 991 N->getMemOperand()->getFlags(), N->getAAInfo()); 992 993 return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), 994 N->getBasePtr(), N->getPointerInfo(), 995 N->getOriginalAlign(), N->getMemOperand()->getFlags(), 996 N->getAAInfo()); 997 } 998 999 /// If the value to round is a vector that needs to be scalarized, it must be 1000 /// <1 x ty>. Convert the element instead. 1001 SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) { 1002 assert(OpNo == 0 && "Wrong operand for scalarization!"); 1003 SDValue Elt = GetScalarizedVector(N->getOperand(0)); 1004 SDValue Res = DAG.getNode(ISD::FP_ROUND, SDLoc(N), 1005 N->getValueType(0).getVectorElementType(), Elt, 1006 N->getOperand(1)); 1007 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); 1008 } 1009 1010 SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, 1011 unsigned OpNo) { 1012 assert(OpNo == 1 && "Wrong operand for scalarization!"); 1013 SDValue Elt = GetScalarizedVector(N->getOperand(1)); 1014 SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N), 1015 { N->getValueType(0).getVectorElementType(), 1016 MVT::Other }, 1017 { N->getOperand(0), Elt, N->getOperand(2) }); 1018 // Legalize the chain result - switch anything that used the old chain to 1019 // use the new one. 1020 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 1021 1022 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); 1023 1024 // Do our own replacement and return SDValue() to tell the caller that we 1025 // handled all replacements since caller can only handle a single result. 1026 ReplaceValueWith(SDValue(N, 0), Res); 1027 return SDValue(); 1028 } 1029 1030 /// If the value to extend is a vector that needs to be scalarized, it must be 1031 /// <1 x ty>. Convert the element instead. 1032 SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_EXTEND(SDNode *N) { 1033 SDValue Elt = GetScalarizedVector(N->getOperand(0)); 1034 SDValue Res = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), 1035 N->getValueType(0).getVectorElementType(), Elt); 1036 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); 1037 } 1038 1039 /// If the value to extend is a vector that needs to be scalarized, it must be 1040 /// <1 x ty>. Convert the element instead. 1041 SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_EXTEND(SDNode *N) { 1042 SDValue Elt = GetScalarizedVector(N->getOperand(1)); 1043 SDValue Res = 1044 DAG.getNode(ISD::STRICT_FP_EXTEND, SDLoc(N), 1045 {N->getValueType(0).getVectorElementType(), MVT::Other}, 1046 {N->getOperand(0), Elt}); 1047 // Legalize the chain result - switch anything that used the old chain to 1048 // use the new one. 1049 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 1050 1051 Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res); 1052 1053 // Do our own replacement and return SDValue() to tell the caller that we 1054 // handled all replacements since caller can only handle a single result. 1055 ReplaceValueWith(SDValue(N, 0), Res); 1056 return SDValue(); 1057 } 1058 1059 SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) { 1060 SDValue Res = GetScalarizedVector(N->getOperand(0)); 1061 // Result type may be wider than element type. 1062 if (Res.getValueType() != N->getValueType(0)) 1063 Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res); 1064 return Res; 1065 } 1066 1067 SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE_SEQ(SDNode *N) { 1068 SDValue AccOp = N->getOperand(0); 1069 SDValue VecOp = N->getOperand(1); 1070 1071 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(N->getOpcode()); 1072 1073 SDValue Op = GetScalarizedVector(VecOp); 1074 return DAG.getNode(BaseOpc, SDLoc(N), N->getValueType(0), 1075 AccOp, Op, N->getFlags()); 1076 } 1077 1078 SDValue DAGTypeLegalizer::ScalarizeVecOp_CMP(SDNode *N) { 1079 SDValue LHS = GetScalarizedVector(N->getOperand(0)); 1080 SDValue RHS = GetScalarizedVector(N->getOperand(1)); 1081 1082 EVT ResVT = N->getValueType(0).getVectorElementType(); 1083 SDValue Cmp = DAG.getNode(N->getOpcode(), SDLoc(N), ResVT, LHS, RHS); 1084 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Cmp); 1085 } 1086 1087 //===----------------------------------------------------------------------===// 1088 // Result Vector Splitting 1089 //===----------------------------------------------------------------------===// 1090 1091 /// This method is called when the specified result of the specified node is 1092 /// found to need vector splitting. At this point, the node may also have 1093 /// invalid operands or may have other results that need legalization, we just 1094 /// know that (at least) one result needs vector splitting. 1095 void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { 1096 LLVM_DEBUG(dbgs() << "Split node result: "; N->dump(&DAG)); 1097 SDValue Lo, Hi; 1098 1099 // See if the target wants to custom expand this node. 1100 if (CustomLowerNode(N, N->getValueType(ResNo), true)) 1101 return; 1102 1103 switch (N->getOpcode()) { 1104 default: 1105 #ifndef NDEBUG 1106 dbgs() << "SplitVectorResult #" << ResNo << ": "; 1107 N->dump(&DAG); 1108 dbgs() << "\n"; 1109 #endif 1110 report_fatal_error("Do not know how to split the result of this " 1111 "operator!\n"); 1112 1113 case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break; 1114 case ISD::AssertZext: SplitVecRes_AssertZext(N, Lo, Hi); break; 1115 case ISD::VSELECT: 1116 case ISD::SELECT: 1117 case ISD::VP_MERGE: 1118 case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break; 1119 case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break; 1120 case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break; 1121 case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break; 1122 case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; 1123 case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; 1124 case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; 1125 case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; 1126 case ISD::FPOWI: 1127 case ISD::FLDEXP: 1128 case ISD::FCOPYSIGN: SplitVecRes_FPOp_MultiType(N, Lo, Hi); break; 1129 case ISD::IS_FPCLASS: SplitVecRes_IS_FPCLASS(N, Lo, Hi); break; 1130 case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; 1131 case ISD::EXPERIMENTAL_VP_SPLAT: SplitVecRes_VP_SPLAT(N, Lo, Hi); break; 1132 case ISD::SPLAT_VECTOR: 1133 case ISD::SCALAR_TO_VECTOR: 1134 SplitVecRes_ScalarOp(N, Lo, Hi); 1135 break; 1136 case ISD::STEP_VECTOR: 1137 SplitVecRes_STEP_VECTOR(N, Lo, Hi); 1138 break; 1139 case ISD::SIGN_EXTEND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; 1140 case ISD::LOAD: 1141 SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); 1142 break; 1143 case ISD::VP_LOAD: 1144 SplitVecRes_VP_LOAD(cast<VPLoadSDNode>(N), Lo, Hi); 1145 break; 1146 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: 1147 SplitVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N), Lo, Hi); 1148 break; 1149 case ISD::MLOAD: 1150 SplitVecRes_MLOAD(cast<MaskedLoadSDNode>(N), Lo, Hi); 1151 break; 1152 case ISD::MGATHER: 1153 case ISD::VP_GATHER: 1154 SplitVecRes_Gather(cast<MemSDNode>(N), Lo, Hi, /*SplitSETCC*/ true); 1155 break; 1156 case ISD::VECTOR_COMPRESS: 1157 SplitVecRes_VECTOR_COMPRESS(N, Lo, Hi); 1158 break; 1159 case ISD::SETCC: 1160 case ISD::VP_SETCC: 1161 SplitVecRes_SETCC(N, Lo, Hi); 1162 break; 1163 case ISD::VECTOR_REVERSE: 1164 SplitVecRes_VECTOR_REVERSE(N, Lo, Hi); 1165 break; 1166 case ISD::VECTOR_SHUFFLE: 1167 SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi); 1168 break; 1169 case ISD::VECTOR_SPLICE: 1170 SplitVecRes_VECTOR_SPLICE(N, Lo, Hi); 1171 break; 1172 case ISD::VECTOR_DEINTERLEAVE: 1173 SplitVecRes_VECTOR_DEINTERLEAVE(N); 1174 return; 1175 case ISD::VECTOR_INTERLEAVE: 1176 SplitVecRes_VECTOR_INTERLEAVE(N); 1177 return; 1178 case ISD::VAARG: 1179 SplitVecRes_VAARG(N, Lo, Hi); 1180 break; 1181 1182 case ISD::ANY_EXTEND_VECTOR_INREG: 1183 case ISD::SIGN_EXTEND_VECTOR_INREG: 1184 case ISD::ZERO_EXTEND_VECTOR_INREG: 1185 SplitVecRes_ExtVecInRegOp(N, Lo, Hi); 1186 break; 1187 1188 case ISD::ABS: 1189 case ISD::VP_ABS: 1190 case ISD::BITREVERSE: 1191 case ISD::VP_BITREVERSE: 1192 case ISD::BSWAP: 1193 case ISD::VP_BSWAP: 1194 case ISD::CTLZ: 1195 case ISD::VP_CTLZ: 1196 case ISD::CTTZ: 1197 case ISD::VP_CTTZ: 1198 case ISD::CTLZ_ZERO_UNDEF: 1199 case ISD::VP_CTLZ_ZERO_UNDEF: 1200 case ISD::CTTZ_ZERO_UNDEF: 1201 case ISD::VP_CTTZ_ZERO_UNDEF: 1202 case ISD::CTPOP: 1203 case ISD::VP_CTPOP: 1204 case ISD::FABS: case ISD::VP_FABS: 1205 case ISD::FACOS: 1206 case ISD::FASIN: 1207 case ISD::FATAN: 1208 case ISD::FCEIL: 1209 case ISD::VP_FCEIL: 1210 case ISD::FCOS: 1211 case ISD::FCOSH: 1212 case ISD::FEXP: 1213 case ISD::FEXP2: 1214 case ISD::FEXP10: 1215 case ISD::FFLOOR: 1216 case ISD::VP_FFLOOR: 1217 case ISD::FLOG: 1218 case ISD::FLOG10: 1219 case ISD::FLOG2: 1220 case ISD::FNEARBYINT: 1221 case ISD::VP_FNEARBYINT: 1222 case ISD::FNEG: case ISD::VP_FNEG: 1223 case ISD::FREEZE: 1224 case ISD::ARITH_FENCE: 1225 case ISD::FP_EXTEND: 1226 case ISD::VP_FP_EXTEND: 1227 case ISD::FP_ROUND: 1228 case ISD::VP_FP_ROUND: 1229 case ISD::FP_TO_SINT: 1230 case ISD::VP_FP_TO_SINT: 1231 case ISD::FP_TO_UINT: 1232 case ISD::VP_FP_TO_UINT: 1233 case ISD::FRINT: 1234 case ISD::VP_FRINT: 1235 case ISD::LRINT: 1236 case ISD::VP_LRINT: 1237 case ISD::LLRINT: 1238 case ISD::VP_LLRINT: 1239 case ISD::FROUND: 1240 case ISD::VP_FROUND: 1241 case ISD::FROUNDEVEN: 1242 case ISD::VP_FROUNDEVEN: 1243 case ISD::LROUND: 1244 case ISD::LLROUND: 1245 case ISD::FSIN: 1246 case ISD::FSINH: 1247 case ISD::FSQRT: case ISD::VP_SQRT: 1248 case ISD::FTAN: 1249 case ISD::FTANH: 1250 case ISD::FTRUNC: 1251 case ISD::VP_FROUNDTOZERO: 1252 case ISD::SINT_TO_FP: 1253 case ISD::VP_SINT_TO_FP: 1254 case ISD::TRUNCATE: 1255 case ISD::VP_TRUNCATE: 1256 case ISD::UINT_TO_FP: 1257 case ISD::VP_UINT_TO_FP: 1258 case ISD::FCANONICALIZE: 1259 SplitVecRes_UnaryOp(N, Lo, Hi); 1260 break; 1261 case ISD::ADDRSPACECAST: 1262 SplitVecRes_ADDRSPACECAST(N, Lo, Hi); 1263 break; 1264 case ISD::FFREXP: 1265 case ISD::FSINCOS: 1266 SplitVecRes_UnaryOpWithTwoResults(N, ResNo, Lo, Hi); 1267 break; 1268 1269 case ISD::ANY_EXTEND: 1270 case ISD::SIGN_EXTEND: 1271 case ISD::ZERO_EXTEND: 1272 case ISD::VP_SIGN_EXTEND: 1273 case ISD::VP_ZERO_EXTEND: 1274 SplitVecRes_ExtendOp(N, Lo, Hi); 1275 break; 1276 1277 case ISD::ADD: case ISD::VP_ADD: 1278 case ISD::SUB: case ISD::VP_SUB: 1279 case ISD::MUL: case ISD::VP_MUL: 1280 case ISD::MULHS: 1281 case ISD::MULHU: 1282 case ISD::ABDS: 1283 case ISD::ABDU: 1284 case ISD::AVGCEILS: 1285 case ISD::AVGCEILU: 1286 case ISD::AVGFLOORS: 1287 case ISD::AVGFLOORU: 1288 case ISD::FADD: case ISD::VP_FADD: 1289 case ISD::FSUB: case ISD::VP_FSUB: 1290 case ISD::FMUL: case ISD::VP_FMUL: 1291 case ISD::FMINNUM: 1292 case ISD::FMINNUM_IEEE: 1293 case ISD::VP_FMINNUM: 1294 case ISD::FMAXNUM: 1295 case ISD::FMAXNUM_IEEE: 1296 case ISD::VP_FMAXNUM: 1297 case ISD::FMINIMUM: 1298 case ISD::VP_FMINIMUM: 1299 case ISD::FMAXIMUM: 1300 case ISD::VP_FMAXIMUM: 1301 case ISD::FMINIMUMNUM: 1302 case ISD::FMAXIMUMNUM: 1303 case ISD::SDIV: case ISD::VP_SDIV: 1304 case ISD::UDIV: case ISD::VP_UDIV: 1305 case ISD::FDIV: case ISD::VP_FDIV: 1306 case ISD::FPOW: 1307 case ISD::FATAN2: 1308 case ISD::AND: case ISD::VP_AND: 1309 case ISD::OR: case ISD::VP_OR: 1310 case ISD::XOR: case ISD::VP_XOR: 1311 case ISD::SHL: case ISD::VP_SHL: 1312 case ISD::SRA: case ISD::VP_SRA: 1313 case ISD::SRL: case ISD::VP_SRL: 1314 case ISD::UREM: case ISD::VP_UREM: 1315 case ISD::SREM: case ISD::VP_SREM: 1316 case ISD::FREM: case ISD::VP_FREM: 1317 case ISD::SMIN: case ISD::VP_SMIN: 1318 case ISD::SMAX: case ISD::VP_SMAX: 1319 case ISD::UMIN: case ISD::VP_UMIN: 1320 case ISD::UMAX: case ISD::VP_UMAX: 1321 case ISD::SADDSAT: case ISD::VP_SADDSAT: 1322 case ISD::UADDSAT: case ISD::VP_UADDSAT: 1323 case ISD::SSUBSAT: case ISD::VP_SSUBSAT: 1324 case ISD::USUBSAT: case ISD::VP_USUBSAT: 1325 case ISD::SSHLSAT: 1326 case ISD::USHLSAT: 1327 case ISD::ROTL: 1328 case ISD::ROTR: 1329 case ISD::VP_FCOPYSIGN: 1330 SplitVecRes_BinOp(N, Lo, Hi); 1331 break; 1332 case ISD::FMA: case ISD::VP_FMA: 1333 case ISD::FSHL: 1334 case ISD::VP_FSHL: 1335 case ISD::FSHR: 1336 case ISD::VP_FSHR: 1337 SplitVecRes_TernaryOp(N, Lo, Hi); 1338 break; 1339 1340 case ISD::SCMP: case ISD::UCMP: 1341 SplitVecRes_CMP(N, Lo, Hi); 1342 break; 1343 1344 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 1345 case ISD::STRICT_##DAGN: 1346 #include "llvm/IR/ConstrainedOps.def" 1347 SplitVecRes_StrictFPOp(N, Lo, Hi); 1348 break; 1349 1350 case ISD::FP_TO_UINT_SAT: 1351 case ISD::FP_TO_SINT_SAT: 1352 SplitVecRes_FP_TO_XINT_SAT(N, Lo, Hi); 1353 break; 1354 1355 case ISD::UADDO: 1356 case ISD::SADDO: 1357 case ISD::USUBO: 1358 case ISD::SSUBO: 1359 case ISD::UMULO: 1360 case ISD::SMULO: 1361 SplitVecRes_OverflowOp(N, ResNo, Lo, Hi); 1362 break; 1363 case ISD::SMULFIX: 1364 case ISD::SMULFIXSAT: 1365 case ISD::UMULFIX: 1366 case ISD::UMULFIXSAT: 1367 case ISD::SDIVFIX: 1368 case ISD::SDIVFIXSAT: 1369 case ISD::UDIVFIX: 1370 case ISD::UDIVFIXSAT: 1371 SplitVecRes_FIX(N, Lo, Hi); 1372 break; 1373 case ISD::EXPERIMENTAL_VP_REVERSE: 1374 SplitVecRes_VP_REVERSE(N, Lo, Hi); 1375 break; 1376 } 1377 1378 // If Lo/Hi is null, the sub-method took care of registering results etc. 1379 if (Lo.getNode()) 1380 SetSplitVector(SDValue(N, ResNo), Lo, Hi); 1381 } 1382 1383 void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT, 1384 MachinePointerInfo &MPI, SDValue &Ptr, 1385 uint64_t *ScaledOffset) { 1386 SDLoc DL(N); 1387 unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinValue() / 8; 1388 1389 if (MemVT.isScalableVector()) { 1390 SDValue BytesIncrement = DAG.getVScale( 1391 DL, Ptr.getValueType(), 1392 APInt(Ptr.getValueSizeInBits().getFixedValue(), IncrementSize)); 1393 MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); 1394 if (ScaledOffset) 1395 *ScaledOffset += IncrementSize; 1396 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement, 1397 SDNodeFlags::NoUnsignedWrap); 1398 } else { 1399 MPI = N->getPointerInfo().getWithOffset(IncrementSize); 1400 // Increment the pointer to the other half. 1401 Ptr = DAG.getObjectPtrOffset(DL, Ptr, TypeSize::getFixed(IncrementSize)); 1402 } 1403 } 1404 1405 std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask) { 1406 return SplitMask(Mask, SDLoc(Mask)); 1407 } 1408 1409 std::pair<SDValue, SDValue> DAGTypeLegalizer::SplitMask(SDValue Mask, 1410 const SDLoc &DL) { 1411 SDValue MaskLo, MaskHi; 1412 EVT MaskVT = Mask.getValueType(); 1413 if (getTypeAction(MaskVT) == TargetLowering::TypeSplitVector) 1414 GetSplitVector(Mask, MaskLo, MaskHi); 1415 else 1416 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); 1417 return std::make_pair(MaskLo, MaskHi); 1418 } 1419 1420 void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi) { 1421 SDValue LHSLo, LHSHi; 1422 GetSplitVector(N->getOperand(0), LHSLo, LHSHi); 1423 SDValue RHSLo, RHSHi; 1424 GetSplitVector(N->getOperand(1), RHSLo, RHSHi); 1425 SDLoc dl(N); 1426 1427 const SDNodeFlags Flags = N->getFlags(); 1428 unsigned Opcode = N->getOpcode(); 1429 if (N->getNumOperands() == 2) { 1430 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Flags); 1431 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Flags); 1432 return; 1433 } 1434 1435 assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); 1436 assert(N->isVPOpcode() && "Expected VP opcode"); 1437 1438 SDValue MaskLo, MaskHi; 1439 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2)); 1440 1441 SDValue EVLLo, EVLHi; 1442 std::tie(EVLLo, EVLHi) = 1443 DAG.SplitEVL(N->getOperand(3), N->getValueType(0), dl); 1444 1445 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), 1446 {LHSLo, RHSLo, MaskLo, EVLLo}, Flags); 1447 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), 1448 {LHSHi, RHSHi, MaskHi, EVLHi}, Flags); 1449 } 1450 1451 void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, 1452 SDValue &Hi) { 1453 SDValue Op0Lo, Op0Hi; 1454 GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); 1455 SDValue Op1Lo, Op1Hi; 1456 GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); 1457 SDValue Op2Lo, Op2Hi; 1458 GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); 1459 SDLoc dl(N); 1460 1461 const SDNodeFlags Flags = N->getFlags(); 1462 unsigned Opcode = N->getOpcode(); 1463 if (N->getNumOperands() == 3) { 1464 Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo, Flags); 1465 Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), Op0Hi, Op1Hi, Op2Hi, Flags); 1466 return; 1467 } 1468 1469 assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); 1470 assert(N->isVPOpcode() && "Expected VP opcode"); 1471 1472 SDValue MaskLo, MaskHi; 1473 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); 1474 1475 SDValue EVLLo, EVLHi; 1476 std::tie(EVLLo, EVLHi) = 1477 DAG.SplitEVL(N->getOperand(4), N->getValueType(0), dl); 1478 1479 Lo = DAG.getNode(Opcode, dl, Op0Lo.getValueType(), 1480 {Op0Lo, Op1Lo, Op2Lo, MaskLo, EVLLo}, Flags); 1481 Hi = DAG.getNode(Opcode, dl, Op0Hi.getValueType(), 1482 {Op0Hi, Op1Hi, Op2Hi, MaskHi, EVLHi}, Flags); 1483 } 1484 1485 void DAGTypeLegalizer::SplitVecRes_CMP(SDNode *N, SDValue &Lo, SDValue &Hi) { 1486 LLVMContext &Ctxt = *DAG.getContext(); 1487 SDLoc dl(N); 1488 1489 SDValue LHS = N->getOperand(0); 1490 SDValue RHS = N->getOperand(1); 1491 1492 SDValue LHSLo, LHSHi, RHSLo, RHSHi; 1493 if (getTypeAction(LHS.getValueType()) == TargetLowering::TypeSplitVector) { 1494 GetSplitVector(LHS, LHSLo, LHSHi); 1495 GetSplitVector(RHS, RHSLo, RHSHi); 1496 } else { 1497 std::tie(LHSLo, LHSHi) = DAG.SplitVector(LHS, dl); 1498 std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, dl); 1499 } 1500 1501 EVT SplitResVT = N->getValueType(0).getHalfNumVectorElementsVT(Ctxt); 1502 Lo = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSLo, RHSLo); 1503 Hi = DAG.getNode(N->getOpcode(), dl, SplitResVT, LHSHi, RHSHi); 1504 } 1505 1506 void DAGTypeLegalizer::SplitVecRes_FIX(SDNode *N, SDValue &Lo, SDValue &Hi) { 1507 SDValue LHSLo, LHSHi; 1508 GetSplitVector(N->getOperand(0), LHSLo, LHSHi); 1509 SDValue RHSLo, RHSHi; 1510 GetSplitVector(N->getOperand(1), RHSLo, RHSHi); 1511 SDLoc dl(N); 1512 SDValue Op2 = N->getOperand(2); 1513 1514 unsigned Opcode = N->getOpcode(); 1515 Lo = DAG.getNode(Opcode, dl, LHSLo.getValueType(), LHSLo, RHSLo, Op2, 1516 N->getFlags()); 1517 Hi = DAG.getNode(Opcode, dl, LHSHi.getValueType(), LHSHi, RHSHi, Op2, 1518 N->getFlags()); 1519 } 1520 1521 void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, 1522 SDValue &Hi) { 1523 // We know the result is a vector. The input may be either a vector or a 1524 // scalar value. 1525 EVT LoVT, HiVT; 1526 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 1527 SDLoc dl(N); 1528 1529 SDValue InOp = N->getOperand(0); 1530 EVT InVT = InOp.getValueType(); 1531 1532 // Handle some special cases efficiently. 1533 switch (getTypeAction(InVT)) { 1534 case TargetLowering::TypeLegal: 1535 case TargetLowering::TypePromoteInteger: 1536 case TargetLowering::TypePromoteFloat: 1537 case TargetLowering::TypeSoftPromoteHalf: 1538 case TargetLowering::TypeSoftenFloat: 1539 case TargetLowering::TypeScalarizeVector: 1540 case TargetLowering::TypeWidenVector: 1541 break; 1542 case TargetLowering::TypeExpandInteger: 1543 case TargetLowering::TypeExpandFloat: 1544 // A scalar to vector conversion, where the scalar needs expansion. 1545 // If the vector is being split in two then we can just convert the 1546 // expanded pieces. 1547 if (LoVT == HiVT) { 1548 GetExpandedOp(InOp, Lo, Hi); 1549 if (DAG.getDataLayout().isBigEndian()) 1550 std::swap(Lo, Hi); 1551 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); 1552 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); 1553 return; 1554 } 1555 break; 1556 case TargetLowering::TypeSplitVector: 1557 // If the input is a vector that needs to be split, convert each split 1558 // piece of the input now. 1559 GetSplitVector(InOp, Lo, Hi); 1560 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); 1561 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); 1562 return; 1563 case TargetLowering::TypeScalarizeScalableVector: 1564 report_fatal_error("Scalarization of scalable vectors is not supported."); 1565 } 1566 1567 if (LoVT.isScalableVector()) { 1568 auto [InLo, InHi] = DAG.SplitVectorOperand(N, 0); 1569 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, InLo); 1570 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, InHi); 1571 return; 1572 } 1573 1574 // In the general case, convert the input to an integer and split it by hand. 1575 EVT LoIntVT = EVT::getIntegerVT(*DAG.getContext(), LoVT.getSizeInBits()); 1576 EVT HiIntVT = EVT::getIntegerVT(*DAG.getContext(), HiVT.getSizeInBits()); 1577 if (DAG.getDataLayout().isBigEndian()) 1578 std::swap(LoIntVT, HiIntVT); 1579 1580 SplitInteger(BitConvertToInteger(InOp), LoIntVT, HiIntVT, Lo, Hi); 1581 1582 if (DAG.getDataLayout().isBigEndian()) 1583 std::swap(Lo, Hi); 1584 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); 1585 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); 1586 } 1587 1588 void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, 1589 SDValue &Hi) { 1590 EVT LoVT, HiVT; 1591 SDLoc dl(N); 1592 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 1593 unsigned LoNumElts = LoVT.getVectorNumElements(); 1594 SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+LoNumElts); 1595 Lo = DAG.getBuildVector(LoVT, dl, LoOps); 1596 1597 SmallVector<SDValue, 8> HiOps(N->op_begin()+LoNumElts, N->op_end()); 1598 Hi = DAG.getBuildVector(HiVT, dl, HiOps); 1599 } 1600 1601 void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, 1602 SDValue &Hi) { 1603 assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); 1604 SDLoc dl(N); 1605 unsigned NumSubvectors = N->getNumOperands() / 2; 1606 if (NumSubvectors == 1) { 1607 Lo = N->getOperand(0); 1608 Hi = N->getOperand(1); 1609 return; 1610 } 1611 1612 EVT LoVT, HiVT; 1613 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 1614 1615 SmallVector<SDValue, 8> LoOps(N->op_begin(), N->op_begin()+NumSubvectors); 1616 Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, LoOps); 1617 1618 SmallVector<SDValue, 8> HiOps(N->op_begin()+NumSubvectors, N->op_end()); 1619 Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HiVT, HiOps); 1620 } 1621 1622 void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, 1623 SDValue &Hi) { 1624 SDValue Vec = N->getOperand(0); 1625 SDValue Idx = N->getOperand(1); 1626 SDLoc dl(N); 1627 1628 EVT LoVT, HiVT; 1629 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 1630 1631 Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); 1632 uint64_t IdxVal = Idx->getAsZExtVal(); 1633 Hi = DAG.getNode( 1634 ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, 1635 DAG.getVectorIdxConstant(IdxVal + LoVT.getVectorMinNumElements(), dl)); 1636 } 1637 1638 void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, 1639 SDValue &Hi) { 1640 SDValue Vec = N->getOperand(0); 1641 SDValue SubVec = N->getOperand(1); 1642 SDValue Idx = N->getOperand(2); 1643 SDLoc dl(N); 1644 GetSplitVector(Vec, Lo, Hi); 1645 1646 EVT VecVT = Vec.getValueType(); 1647 EVT LoVT = Lo.getValueType(); 1648 EVT SubVecVT = SubVec.getValueType(); 1649 unsigned VecElems = VecVT.getVectorMinNumElements(); 1650 unsigned SubElems = SubVecVT.getVectorMinNumElements(); 1651 unsigned LoElems = LoVT.getVectorMinNumElements(); 1652 1653 // If we know the index is in the first half, and we know the subvector 1654 // doesn't cross the boundary between the halves, we can avoid spilling the 1655 // vector, and insert into the lower half of the split vector directly. 1656 unsigned IdxVal = Idx->getAsZExtVal(); 1657 if (IdxVal + SubElems <= LoElems) { 1658 Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx); 1659 return; 1660 } 1661 // Similarly if the subvector is fully in the high half, but mind that we 1662 // can't tell whether a fixed-length subvector is fully within the high half 1663 // of a scalable vector. 1664 if (VecVT.isScalableVector() == SubVecVT.isScalableVector() && 1665 IdxVal >= LoElems && IdxVal + SubElems <= VecElems) { 1666 Hi = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, Hi.getValueType(), Hi, SubVec, 1667 DAG.getVectorIdxConstant(IdxVal - LoElems, dl)); 1668 return; 1669 } 1670 1671 // Spill the vector to the stack. 1672 // In cases where the vector is illegal it will be broken down into parts 1673 // and stored in parts - we should use the alignment for the smallest part. 1674 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); 1675 SDValue StackPtr = 1676 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); 1677 auto &MF = DAG.getMachineFunction(); 1678 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); 1679 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); 1680 1681 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, 1682 SmallestAlign); 1683 1684 // Store the new subvector into the specified index. 1685 SDValue SubVecPtr = 1686 TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVecVT, Idx); 1687 Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, 1688 MachinePointerInfo::getUnknownStack(MF)); 1689 1690 // Load the Lo part from the stack slot. 1691 Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, PtrInfo, 1692 SmallestAlign); 1693 1694 // Increment the pointer to the other part. 1695 auto *Load = cast<LoadSDNode>(Lo); 1696 MachinePointerInfo MPI = Load->getPointerInfo(); 1697 IncrementPointer(Load, LoVT, MPI, StackPtr); 1698 1699 // Load the Hi part from the stack slot. 1700 Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MPI, SmallestAlign); 1701 } 1702 1703 // Handle splitting an FP where the second operand does not match the first 1704 // type. The second operand may be a scalar, or a vector that has exactly as 1705 // many elements as the first 1706 void DAGTypeLegalizer::SplitVecRes_FPOp_MultiType(SDNode *N, SDValue &Lo, 1707 SDValue &Hi) { 1708 SDValue LHSLo, LHSHi; 1709 GetSplitVector(N->getOperand(0), LHSLo, LHSHi); 1710 SDLoc DL(N); 1711 1712 SDValue RHSLo, RHSHi; 1713 SDValue RHS = N->getOperand(1); 1714 EVT RHSVT = RHS.getValueType(); 1715 if (RHSVT.isVector()) { 1716 if (getTypeAction(RHSVT) == TargetLowering::TypeSplitVector) 1717 GetSplitVector(RHS, RHSLo, RHSHi); 1718 else 1719 std::tie(RHSLo, RHSHi) = DAG.SplitVector(RHS, SDLoc(RHS)); 1720 1721 Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHSLo); 1722 Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHSHi); 1723 } else { 1724 Lo = DAG.getNode(N->getOpcode(), DL, LHSLo.getValueType(), LHSLo, RHS); 1725 Hi = DAG.getNode(N->getOpcode(), DL, LHSHi.getValueType(), LHSHi, RHS); 1726 } 1727 } 1728 1729 void DAGTypeLegalizer::SplitVecRes_IS_FPCLASS(SDNode *N, SDValue &Lo, 1730 SDValue &Hi) { 1731 SDLoc DL(N); 1732 SDValue ArgLo, ArgHi; 1733 SDValue Test = N->getOperand(1); 1734 SDValue FpValue = N->getOperand(0); 1735 if (getTypeAction(FpValue.getValueType()) == TargetLowering::TypeSplitVector) 1736 GetSplitVector(FpValue, ArgLo, ArgHi); 1737 else 1738 std::tie(ArgLo, ArgHi) = DAG.SplitVector(FpValue, SDLoc(FpValue)); 1739 EVT LoVT, HiVT; 1740 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 1741 1742 Lo = DAG.getNode(ISD::IS_FPCLASS, DL, LoVT, ArgLo, Test, N->getFlags()); 1743 Hi = DAG.getNode(ISD::IS_FPCLASS, DL, HiVT, ArgHi, Test, N->getFlags()); 1744 } 1745 1746 void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, 1747 SDValue &Hi) { 1748 SDValue LHSLo, LHSHi; 1749 GetSplitVector(N->getOperand(0), LHSLo, LHSHi); 1750 SDLoc dl(N); 1751 1752 EVT LoVT, HiVT; 1753 std::tie(LoVT, HiVT) = 1754 DAG.GetSplitDestVTs(cast<VTSDNode>(N->getOperand(1))->getVT()); 1755 1756 Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, 1757 DAG.getValueType(LoVT)); 1758 Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, 1759 DAG.getValueType(HiVT)); 1760 } 1761 1762 void DAGTypeLegalizer::SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, 1763 SDValue &Hi) { 1764 unsigned Opcode = N->getOpcode(); 1765 SDValue N0 = N->getOperand(0); 1766 1767 SDLoc dl(N); 1768 SDValue InLo, InHi; 1769 1770 if (getTypeAction(N0.getValueType()) == TargetLowering::TypeSplitVector) 1771 GetSplitVector(N0, InLo, InHi); 1772 else 1773 std::tie(InLo, InHi) = DAG.SplitVectorOperand(N, 0); 1774 1775 EVT InLoVT = InLo.getValueType(); 1776 unsigned InNumElements = InLoVT.getVectorNumElements(); 1777 1778 EVT OutLoVT, OutHiVT; 1779 std::tie(OutLoVT, OutHiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 1780 unsigned OutNumElements = OutLoVT.getVectorNumElements(); 1781 assert((2 * OutNumElements) <= InNumElements && 1782 "Illegal extend vector in reg split"); 1783 1784 // *_EXTEND_VECTOR_INREG instructions extend the lowest elements of the 1785 // input vector (i.e. we only use InLo): 1786 // OutLo will extend the first OutNumElements from InLo. 1787 // OutHi will extend the next OutNumElements from InLo. 1788 1789 // Shuffle the elements from InLo for OutHi into the bottom elements to 1790 // create a 'fake' InHi. 1791 SmallVector<int, 8> SplitHi(InNumElements, -1); 1792 for (unsigned i = 0; i != OutNumElements; ++i) 1793 SplitHi[i] = i + OutNumElements; 1794 InHi = DAG.getVectorShuffle(InLoVT, dl, InLo, DAG.getUNDEF(InLoVT), SplitHi); 1795 1796 Lo = DAG.getNode(Opcode, dl, OutLoVT, InLo); 1797 Hi = DAG.getNode(Opcode, dl, OutHiVT, InHi); 1798 } 1799 1800 void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, 1801 SDValue &Hi) { 1802 unsigned NumOps = N->getNumOperands(); 1803 SDValue Chain = N->getOperand(0); 1804 EVT LoVT, HiVT; 1805 SDLoc dl(N); 1806 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 1807 1808 SmallVector<SDValue, 4> OpsLo(NumOps); 1809 SmallVector<SDValue, 4> OpsHi(NumOps); 1810 1811 // The Chain is the first operand. 1812 OpsLo[0] = Chain; 1813 OpsHi[0] = Chain; 1814 1815 // Now process the remaining operands. 1816 for (unsigned i = 1; i < NumOps; ++i) { 1817 SDValue Op = N->getOperand(i); 1818 SDValue OpLo = Op; 1819 SDValue OpHi = Op; 1820 1821 EVT InVT = Op.getValueType(); 1822 if (InVT.isVector()) { 1823 // If the input also splits, handle it directly for a 1824 // compile time speedup. Otherwise split it by hand. 1825 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) 1826 GetSplitVector(Op, OpLo, OpHi); 1827 else 1828 std::tie(OpLo, OpHi) = DAG.SplitVectorOperand(N, i); 1829 } 1830 1831 OpsLo[i] = OpLo; 1832 OpsHi[i] = OpHi; 1833 } 1834 1835 EVT LoValueVTs[] = {LoVT, MVT::Other}; 1836 EVT HiValueVTs[] = {HiVT, MVT::Other}; 1837 Lo = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(LoValueVTs), OpsLo, 1838 N->getFlags()); 1839 Hi = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(HiValueVTs), OpsHi, 1840 N->getFlags()); 1841 1842 // Build a factor node to remember that this Op is independent of the 1843 // other one. 1844 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1845 Lo.getValue(1), Hi.getValue(1)); 1846 1847 // Legalize the chain result - switch anything that used the old chain to 1848 // use the new one. 1849 ReplaceValueWith(SDValue(N, 1), Chain); 1850 } 1851 1852 SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) { 1853 SDValue Chain = N->getOperand(0); 1854 EVT VT = N->getValueType(0); 1855 unsigned NE = VT.getVectorNumElements(); 1856 EVT EltVT = VT.getVectorElementType(); 1857 SDLoc dl(N); 1858 1859 SmallVector<SDValue, 8> Scalars; 1860 SmallVector<SDValue, 4> Operands(N->getNumOperands()); 1861 1862 // If ResNE is 0, fully unroll the vector op. 1863 if (ResNE == 0) 1864 ResNE = NE; 1865 else if (NE > ResNE) 1866 NE = ResNE; 1867 1868 //The results of each unrolled operation, including the chain. 1869 EVT ChainVTs[] = {EltVT, MVT::Other}; 1870 SmallVector<SDValue, 8> Chains; 1871 1872 unsigned i; 1873 for (i = 0; i != NE; ++i) { 1874 Operands[0] = Chain; 1875 for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) { 1876 SDValue Operand = N->getOperand(j); 1877 EVT OperandVT = Operand.getValueType(); 1878 if (OperandVT.isVector()) { 1879 EVT OperandEltVT = OperandVT.getVectorElementType(); 1880 Operands[j] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, 1881 Operand, DAG.getVectorIdxConstant(i, dl)); 1882 } else { 1883 Operands[j] = Operand; 1884 } 1885 } 1886 SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands); 1887 Scalar.getNode()->setFlags(N->getFlags()); 1888 1889 //Add in the scalar as well as its chain value to the 1890 //result vectors. 1891 Scalars.push_back(Scalar); 1892 Chains.push_back(Scalar.getValue(1)); 1893 } 1894 1895 for (; i < ResNE; ++i) 1896 Scalars.push_back(DAG.getUNDEF(EltVT)); 1897 1898 // Build a new factor node to connect the chain back together. 1899 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); 1900 ReplaceValueWith(SDValue(N, 1), Chain); 1901 1902 // Create a new BUILD_VECTOR node 1903 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE); 1904 return DAG.getBuildVector(VecVT, dl, Scalars); 1905 } 1906 1907 void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo, 1908 SDValue &Lo, SDValue &Hi) { 1909 SDLoc dl(N); 1910 EVT ResVT = N->getValueType(0); 1911 EVT OvVT = N->getValueType(1); 1912 EVT LoResVT, HiResVT, LoOvVT, HiOvVT; 1913 std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT); 1914 std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT); 1915 1916 SDValue LoLHS, HiLHS, LoRHS, HiRHS; 1917 if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) { 1918 GetSplitVector(N->getOperand(0), LoLHS, HiLHS); 1919 GetSplitVector(N->getOperand(1), LoRHS, HiRHS); 1920 } else { 1921 std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0); 1922 std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1); 1923 } 1924 1925 unsigned Opcode = N->getOpcode(); 1926 SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT); 1927 SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT); 1928 SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode(); 1929 SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode(); 1930 LoNode->setFlags(N->getFlags()); 1931 HiNode->setFlags(N->getFlags()); 1932 1933 Lo = SDValue(LoNode, ResNo); 1934 Hi = SDValue(HiNode, ResNo); 1935 1936 // Replace the other vector result not being explicitly split here. 1937 unsigned OtherNo = 1 - ResNo; 1938 EVT OtherVT = N->getValueType(OtherNo); 1939 if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) { 1940 SetSplitVector(SDValue(N, OtherNo), 1941 SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo)); 1942 } else { 1943 SDValue OtherVal = DAG.getNode( 1944 ISD::CONCAT_VECTORS, dl, OtherVT, 1945 SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo)); 1946 ReplaceValueWith(SDValue(N, OtherNo), OtherVal); 1947 } 1948 } 1949 1950 void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, 1951 SDValue &Hi) { 1952 SDValue Vec = N->getOperand(0); 1953 SDValue Elt = N->getOperand(1); 1954 SDValue Idx = N->getOperand(2); 1955 SDLoc dl(N); 1956 GetSplitVector(Vec, Lo, Hi); 1957 1958 if (ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx)) { 1959 unsigned IdxVal = CIdx->getZExtValue(); 1960 unsigned LoNumElts = Lo.getValueType().getVectorMinNumElements(); 1961 if (IdxVal < LoNumElts) { 1962 Lo = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, 1963 Lo.getValueType(), Lo, Elt, Idx); 1964 return; 1965 } else if (!Vec.getValueType().isScalableVector()) { 1966 Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, 1967 DAG.getVectorIdxConstant(IdxVal - LoNumElts, dl)); 1968 return; 1969 } 1970 } 1971 1972 // Make the vector elements byte-addressable if they aren't already. 1973 EVT VecVT = Vec.getValueType(); 1974 EVT EltVT = VecVT.getVectorElementType(); 1975 if (!EltVT.isByteSized()) { 1976 EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext()); 1977 VecVT = VecVT.changeElementType(EltVT); 1978 Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); 1979 // Extend the element type to match if needed. 1980 if (EltVT.bitsGT(Elt.getValueType())) 1981 Elt = DAG.getNode(ISD::ANY_EXTEND, dl, EltVT, Elt); 1982 } 1983 1984 // Spill the vector to the stack. 1985 // In cases where the vector is illegal it will be broken down into parts 1986 // and stored in parts - we should use the alignment for the smallest part. 1987 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); 1988 SDValue StackPtr = 1989 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); 1990 auto &MF = DAG.getMachineFunction(); 1991 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); 1992 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); 1993 1994 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, 1995 SmallestAlign); 1996 1997 // Store the new element. This may be larger than the vector element type, 1998 // so use a truncating store. 1999 SDValue EltPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); 2000 Store = DAG.getTruncStore( 2001 Store, dl, Elt, EltPtr, MachinePointerInfo::getUnknownStack(MF), EltVT, 2002 commonAlignment(SmallestAlign, 2003 EltVT.getFixedSizeInBits() / 8)); 2004 2005 EVT LoVT, HiVT; 2006 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT); 2007 2008 // Load the Lo part from the stack slot. 2009 Lo = DAG.getLoad(LoVT, dl, Store, StackPtr, PtrInfo, SmallestAlign); 2010 2011 // Increment the pointer to the other part. 2012 auto Load = cast<LoadSDNode>(Lo); 2013 MachinePointerInfo MPI = Load->getPointerInfo(); 2014 IncrementPointer(Load, LoVT, MPI, StackPtr); 2015 2016 Hi = DAG.getLoad(HiVT, dl, Store, StackPtr, MPI, SmallestAlign); 2017 2018 // If we adjusted the original type, we need to truncate the results. 2019 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 2020 if (LoVT != Lo.getValueType()) 2021 Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Lo); 2022 if (HiVT != Hi.getValueType()) 2023 Hi = DAG.getNode(ISD::TRUNCATE, dl, HiVT, Hi); 2024 } 2025 2026 void DAGTypeLegalizer::SplitVecRes_STEP_VECTOR(SDNode *N, SDValue &Lo, 2027 SDValue &Hi) { 2028 EVT LoVT, HiVT; 2029 SDLoc dl(N); 2030 assert(N->getValueType(0).isScalableVector() && 2031 "Only scalable vectors are supported for STEP_VECTOR"); 2032 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 2033 SDValue Step = N->getOperand(0); 2034 2035 Lo = DAG.getNode(ISD::STEP_VECTOR, dl, LoVT, Step); 2036 2037 // Hi = Lo + (EltCnt * Step) 2038 EVT EltVT = Step.getValueType(); 2039 APInt StepVal = Step->getAsAPIntVal(); 2040 SDValue StartOfHi = 2041 DAG.getVScale(dl, EltVT, StepVal * LoVT.getVectorMinNumElements()); 2042 StartOfHi = DAG.getSExtOrTrunc(StartOfHi, dl, HiVT.getVectorElementType()); 2043 StartOfHi = DAG.getNode(ISD::SPLAT_VECTOR, dl, HiVT, StartOfHi); 2044 2045 Hi = DAG.getNode(ISD::STEP_VECTOR, dl, HiVT, Step); 2046 Hi = DAG.getNode(ISD::ADD, dl, HiVT, Hi, StartOfHi); 2047 } 2048 2049 void DAGTypeLegalizer::SplitVecRes_ScalarOp(SDNode *N, SDValue &Lo, 2050 SDValue &Hi) { 2051 EVT LoVT, HiVT; 2052 SDLoc dl(N); 2053 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 2054 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0)); 2055 if (N->getOpcode() == ISD::SCALAR_TO_VECTOR) { 2056 Hi = DAG.getUNDEF(HiVT); 2057 } else { 2058 assert(N->getOpcode() == ISD::SPLAT_VECTOR && "Unexpected opcode"); 2059 Hi = Lo; 2060 } 2061 } 2062 2063 void DAGTypeLegalizer::SplitVecRes_VP_SPLAT(SDNode *N, SDValue &Lo, 2064 SDValue &Hi) { 2065 SDLoc dl(N); 2066 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); 2067 auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1)); 2068 auto [EVLLo, EVLHi] = DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); 2069 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, N->getOperand(0), MaskLo, EVLLo); 2070 Hi = DAG.getNode(N->getOpcode(), dl, HiVT, N->getOperand(0), MaskHi, EVLHi); 2071 } 2072 2073 void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, 2074 SDValue &Hi) { 2075 assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); 2076 EVT LoVT, HiVT; 2077 SDLoc dl(LD); 2078 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); 2079 2080 ISD::LoadExtType ExtType = LD->getExtensionType(); 2081 SDValue Ch = LD->getChain(); 2082 SDValue Ptr = LD->getBasePtr(); 2083 SDValue Offset = DAG.getUNDEF(Ptr.getValueType()); 2084 EVT MemoryVT = LD->getMemoryVT(); 2085 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); 2086 AAMDNodes AAInfo = LD->getAAInfo(); 2087 2088 EVT LoMemVT, HiMemVT; 2089 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 2090 2091 if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) { 2092 SDValue Value, NewChain; 2093 std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); 2094 std::tie(Lo, Hi) = DAG.SplitVector(Value, dl); 2095 ReplaceValueWith(SDValue(LD, 1), NewChain); 2096 return; 2097 } 2098 2099 Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, 2100 LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(), 2101 MMOFlags, AAInfo); 2102 2103 MachinePointerInfo MPI; 2104 IncrementPointer(LD, LoMemVT, MPI, Ptr); 2105 2106 Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI, 2107 HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo); 2108 2109 // Build a factor node to remember that this load is independent of the 2110 // other one. 2111 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), 2112 Hi.getValue(1)); 2113 2114 // Legalize the chain result - switch anything that used the old chain to 2115 // use the new one. 2116 ReplaceValueWith(SDValue(LD, 1), Ch); 2117 } 2118 2119 void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, 2120 SDValue &Hi) { 2121 assert(LD->isUnindexed() && "Indexed VP load during type legalization!"); 2122 EVT LoVT, HiVT; 2123 SDLoc dl(LD); 2124 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); 2125 2126 ISD::LoadExtType ExtType = LD->getExtensionType(); 2127 SDValue Ch = LD->getChain(); 2128 SDValue Ptr = LD->getBasePtr(); 2129 SDValue Offset = LD->getOffset(); 2130 assert(Offset.isUndef() && "Unexpected indexed variable-length load offset"); 2131 Align Alignment = LD->getOriginalAlign(); 2132 SDValue Mask = LD->getMask(); 2133 SDValue EVL = LD->getVectorLength(); 2134 EVT MemoryVT = LD->getMemoryVT(); 2135 2136 EVT LoMemVT, HiMemVT; 2137 bool HiIsEmpty = false; 2138 std::tie(LoMemVT, HiMemVT) = 2139 DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty); 2140 2141 // Split Mask operand 2142 SDValue MaskLo, MaskHi; 2143 if (Mask.getOpcode() == ISD::SETCC) { 2144 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); 2145 } else { 2146 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) 2147 GetSplitVector(Mask, MaskLo, MaskHi); 2148 else 2149 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); 2150 } 2151 2152 // Split EVL operand 2153 SDValue EVLLo, EVLHi; 2154 std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl); 2155 2156 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 2157 LD->getPointerInfo(), MachineMemOperand::MOLoad, 2158 LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(), 2159 LD->getRanges()); 2160 2161 Lo = 2162 DAG.getLoadVP(LD->getAddressingMode(), ExtType, LoVT, dl, Ch, Ptr, Offset, 2163 MaskLo, EVLLo, LoMemVT, MMO, LD->isExpandingLoad()); 2164 2165 if (HiIsEmpty) { 2166 // The hi vp_load has zero storage size. We therefore simply set it to 2167 // the low vp_load and rely on subsequent removal from the chain. 2168 Hi = Lo; 2169 } else { 2170 // Generate hi vp_load. 2171 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, 2172 LD->isExpandingLoad()); 2173 2174 MachinePointerInfo MPI; 2175 if (LoMemVT.isScalableVector()) 2176 MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace()); 2177 else 2178 MPI = LD->getPointerInfo().getWithOffset( 2179 LoMemVT.getStoreSize().getFixedValue()); 2180 2181 MMO = DAG.getMachineFunction().getMachineMemOperand( 2182 MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), 2183 Alignment, LD->getAAInfo(), LD->getRanges()); 2184 2185 Hi = DAG.getLoadVP(LD->getAddressingMode(), ExtType, HiVT, dl, Ch, Ptr, 2186 Offset, MaskHi, EVLHi, HiMemVT, MMO, 2187 LD->isExpandingLoad()); 2188 } 2189 2190 // Build a factor node to remember that this load is independent of the 2191 // other one. 2192 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), 2193 Hi.getValue(1)); 2194 2195 // Legalize the chain result - switch anything that used the old chain to 2196 // use the new one. 2197 ReplaceValueWith(SDValue(LD, 1), Ch); 2198 } 2199 2200 void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, 2201 SDValue &Lo, SDValue &Hi) { 2202 assert(SLD->isUnindexed() && 2203 "Indexed VP strided load during type legalization!"); 2204 assert(SLD->getOffset().isUndef() && 2205 "Unexpected indexed variable-length load offset"); 2206 2207 SDLoc DL(SLD); 2208 2209 EVT LoVT, HiVT; 2210 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(SLD->getValueType(0)); 2211 2212 EVT LoMemVT, HiMemVT; 2213 bool HiIsEmpty = false; 2214 std::tie(LoMemVT, HiMemVT) = 2215 DAG.GetDependentSplitDestVTs(SLD->getMemoryVT(), LoVT, &HiIsEmpty); 2216 2217 SDValue Mask = SLD->getMask(); 2218 SDValue LoMask, HiMask; 2219 if (Mask.getOpcode() == ISD::SETCC) { 2220 SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask); 2221 } else { 2222 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) 2223 GetSplitVector(Mask, LoMask, HiMask); 2224 else 2225 std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); 2226 } 2227 2228 SDValue LoEVL, HiEVL; 2229 std::tie(LoEVL, HiEVL) = 2230 DAG.SplitEVL(SLD->getVectorLength(), SLD->getValueType(0), DL); 2231 2232 // Generate the low vp_strided_load 2233 Lo = DAG.getStridedLoadVP( 2234 SLD->getAddressingMode(), SLD->getExtensionType(), LoVT, DL, 2235 SLD->getChain(), SLD->getBasePtr(), SLD->getOffset(), SLD->getStride(), 2236 LoMask, LoEVL, LoMemVT, SLD->getMemOperand(), SLD->isExpandingLoad()); 2237 2238 if (HiIsEmpty) { 2239 // The high vp_strided_load has zero storage size. We therefore simply set 2240 // it to the low vp_strided_load and rely on subsequent removal from the 2241 // chain. 2242 Hi = Lo; 2243 } else { 2244 // Generate the high vp_strided_load. 2245 // To calculate the high base address, we need to sum to the low base 2246 // address stride number of bytes for each element already loaded by low, 2247 // that is: Ptr = Ptr + (LoEVL * Stride) 2248 EVT PtrVT = SLD->getBasePtr().getValueType(); 2249 SDValue Increment = 2250 DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL, 2251 DAG.getSExtOrTrunc(SLD->getStride(), DL, PtrVT)); 2252 SDValue Ptr = 2253 DAG.getNode(ISD::ADD, DL, PtrVT, SLD->getBasePtr(), Increment); 2254 2255 Align Alignment = SLD->getOriginalAlign(); 2256 if (LoMemVT.isScalableVector()) 2257 Alignment = commonAlignment( 2258 Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8); 2259 2260 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 2261 MachinePointerInfo(SLD->getPointerInfo().getAddrSpace()), 2262 MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), 2263 Alignment, SLD->getAAInfo(), SLD->getRanges()); 2264 2265 Hi = DAG.getStridedLoadVP(SLD->getAddressingMode(), SLD->getExtensionType(), 2266 HiVT, DL, SLD->getChain(), Ptr, SLD->getOffset(), 2267 SLD->getStride(), HiMask, HiEVL, HiMemVT, MMO, 2268 SLD->isExpandingLoad()); 2269 } 2270 2271 // Build a factor node to remember that this load is independent of the 2272 // other one. 2273 SDValue Ch = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 2274 Hi.getValue(1)); 2275 2276 // Legalize the chain result - switch anything that used the old chain to 2277 // use the new one. 2278 ReplaceValueWith(SDValue(SLD, 1), Ch); 2279 } 2280 2281 void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, 2282 SDValue &Lo, SDValue &Hi) { 2283 assert(MLD->isUnindexed() && "Indexed masked load during type legalization!"); 2284 EVT LoVT, HiVT; 2285 SDLoc dl(MLD); 2286 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 2287 2288 SDValue Ch = MLD->getChain(); 2289 SDValue Ptr = MLD->getBasePtr(); 2290 SDValue Offset = MLD->getOffset(); 2291 assert(Offset.isUndef() && "Unexpected indexed masked load offset"); 2292 SDValue Mask = MLD->getMask(); 2293 SDValue PassThru = MLD->getPassThru(); 2294 Align Alignment = MLD->getOriginalAlign(); 2295 ISD::LoadExtType ExtType = MLD->getExtensionType(); 2296 2297 // Split Mask operand 2298 SDValue MaskLo, MaskHi; 2299 if (Mask.getOpcode() == ISD::SETCC) { 2300 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); 2301 } else { 2302 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) 2303 GetSplitVector(Mask, MaskLo, MaskHi); 2304 else 2305 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); 2306 } 2307 2308 EVT MemoryVT = MLD->getMemoryVT(); 2309 EVT LoMemVT, HiMemVT; 2310 bool HiIsEmpty = false; 2311 std::tie(LoMemVT, HiMemVT) = 2312 DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty); 2313 2314 SDValue PassThruLo, PassThruHi; 2315 if (getTypeAction(PassThru.getValueType()) == TargetLowering::TypeSplitVector) 2316 GetSplitVector(PassThru, PassThruLo, PassThruHi); 2317 else 2318 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); 2319 2320 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 2321 MLD->getPointerInfo(), MachineMemOperand::MOLoad, 2322 LocationSize::beforeOrAfterPointer(), Alignment, MLD->getAAInfo(), 2323 MLD->getRanges()); 2324 2325 Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, Offset, MaskLo, PassThruLo, LoMemVT, 2326 MMO, MLD->getAddressingMode(), ExtType, 2327 MLD->isExpandingLoad()); 2328 2329 if (HiIsEmpty) { 2330 // The hi masked load has zero storage size. We therefore simply set it to 2331 // the low masked load and rely on subsequent removal from the chain. 2332 Hi = Lo; 2333 } else { 2334 // Generate hi masked load. 2335 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG, 2336 MLD->isExpandingLoad()); 2337 2338 MachinePointerInfo MPI; 2339 if (LoMemVT.isScalableVector()) 2340 MPI = MachinePointerInfo(MLD->getPointerInfo().getAddrSpace()); 2341 else 2342 MPI = MLD->getPointerInfo().getWithOffset( 2343 LoMemVT.getStoreSize().getFixedValue()); 2344 2345 MMO = DAG.getMachineFunction().getMachineMemOperand( 2346 MPI, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), 2347 Alignment, MLD->getAAInfo(), MLD->getRanges()); 2348 2349 Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, Offset, MaskHi, PassThruHi, 2350 HiMemVT, MMO, MLD->getAddressingMode(), ExtType, 2351 MLD->isExpandingLoad()); 2352 } 2353 2354 // Build a factor node to remember that this load is independent of the 2355 // other one. 2356 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), 2357 Hi.getValue(1)); 2358 2359 // Legalize the chain result - switch anything that used the old chain to 2360 // use the new one. 2361 ReplaceValueWith(SDValue(MLD, 1), Ch); 2362 2363 } 2364 2365 void DAGTypeLegalizer::SplitVecRes_Gather(MemSDNode *N, SDValue &Lo, 2366 SDValue &Hi, bool SplitSETCC) { 2367 EVT LoVT, HiVT; 2368 SDLoc dl(N); 2369 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 2370 2371 SDValue Ch = N->getChain(); 2372 SDValue Ptr = N->getBasePtr(); 2373 struct Operands { 2374 SDValue Mask; 2375 SDValue Index; 2376 SDValue Scale; 2377 } Ops = [&]() -> Operands { 2378 if (auto *MSC = dyn_cast<MaskedGatherSDNode>(N)) { 2379 return {MSC->getMask(), MSC->getIndex(), MSC->getScale()}; 2380 } 2381 auto *VPSC = cast<VPGatherSDNode>(N); 2382 return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale()}; 2383 }(); 2384 2385 EVT MemoryVT = N->getMemoryVT(); 2386 Align Alignment = N->getOriginalAlign(); 2387 2388 // Split Mask operand 2389 SDValue MaskLo, MaskHi; 2390 if (SplitSETCC && Ops.Mask.getOpcode() == ISD::SETCC) { 2391 SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi); 2392 } else { 2393 std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, dl); 2394 } 2395 2396 EVT LoMemVT, HiMemVT; 2397 // Split MemoryVT 2398 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 2399 2400 SDValue IndexHi, IndexLo; 2401 if (getTypeAction(Ops.Index.getValueType()) == 2402 TargetLowering::TypeSplitVector) 2403 GetSplitVector(Ops.Index, IndexLo, IndexHi); 2404 else 2405 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, dl); 2406 2407 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 2408 N->getPointerInfo(), MachineMemOperand::MOLoad, 2409 LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), 2410 N->getRanges()); 2411 2412 if (auto *MGT = dyn_cast<MaskedGatherSDNode>(N)) { 2413 SDValue PassThru = MGT->getPassThru(); 2414 SDValue PassThruLo, PassThruHi; 2415 if (getTypeAction(PassThru.getValueType()) == 2416 TargetLowering::TypeSplitVector) 2417 GetSplitVector(PassThru, PassThruLo, PassThruHi); 2418 else 2419 std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, dl); 2420 2421 ISD::LoadExtType ExtType = MGT->getExtensionType(); 2422 ISD::MemIndexType IndexTy = MGT->getIndexType(); 2423 2424 SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Ops.Scale}; 2425 Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, 2426 OpsLo, MMO, IndexTy, ExtType); 2427 2428 SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Ops.Scale}; 2429 Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, 2430 OpsHi, MMO, IndexTy, ExtType); 2431 } else { 2432 auto *VPGT = cast<VPGatherSDNode>(N); 2433 SDValue EVLLo, EVLHi; 2434 std::tie(EVLLo, EVLHi) = 2435 DAG.SplitEVL(VPGT->getVectorLength(), MemoryVT, dl); 2436 2437 SDValue OpsLo[] = {Ch, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo}; 2438 Lo = DAG.getGatherVP(DAG.getVTList(LoVT, MVT::Other), LoMemVT, dl, OpsLo, 2439 MMO, VPGT->getIndexType()); 2440 2441 SDValue OpsHi[] = {Ch, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi}; 2442 Hi = DAG.getGatherVP(DAG.getVTList(HiVT, MVT::Other), HiMemVT, dl, OpsHi, 2443 MMO, VPGT->getIndexType()); 2444 } 2445 2446 // Build a factor node to remember that this load is independent of the 2447 // other one. 2448 Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), 2449 Hi.getValue(1)); 2450 2451 // Legalize the chain result - switch anything that used the old chain to 2452 // use the new one. 2453 ReplaceValueWith(SDValue(N, 1), Ch); 2454 } 2455 2456 void DAGTypeLegalizer::SplitVecRes_VECTOR_COMPRESS(SDNode *N, SDValue &Lo, 2457 SDValue &Hi) { 2458 // This is not "trivial", as there is a dependency between the two subvectors. 2459 // Depending on the number of 1s in the mask, the elements from the Hi vector 2460 // need to be moved to the Lo vector. Passthru values make this even harder. 2461 // We try to use VECTOR_COMPRESS if the target has custom lowering with 2462 // smaller types and passthru is undef, as it is most likely faster than the 2463 // fully expand path. Otherwise, just do the full expansion as one "big" 2464 // operation and then extract the Lo and Hi vectors from that. This gets 2465 // rid of VECTOR_COMPRESS and all other operands can be legalized later. 2466 SDLoc DL(N); 2467 EVT VecVT = N->getValueType(0); 2468 2469 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(VecVT); 2470 bool HasCustomLowering = false; 2471 EVT CheckVT = LoVT; 2472 while (CheckVT.getVectorMinNumElements() > 1) { 2473 // TLI.isOperationLegalOrCustom requires a legal type, but we could have a 2474 // custom lowering for illegal types. So we do the checks separately. 2475 if (TLI.isOperationLegal(ISD::VECTOR_COMPRESS, CheckVT) || 2476 TLI.isOperationCustom(ISD::VECTOR_COMPRESS, CheckVT)) { 2477 HasCustomLowering = true; 2478 break; 2479 } 2480 CheckVT = CheckVT.getHalfNumVectorElementsVT(*DAG.getContext()); 2481 } 2482 2483 SDValue Passthru = N->getOperand(2); 2484 if (!HasCustomLowering) { 2485 SDValue Compressed = TLI.expandVECTOR_COMPRESS(N, DAG); 2486 std::tie(Lo, Hi) = DAG.SplitVector(Compressed, DL, LoVT, HiVT); 2487 return; 2488 } 2489 2490 // Try to VECTOR_COMPRESS smaller vectors and combine via a stack store+load. 2491 SDValue Mask = N->getOperand(1); 2492 SDValue LoMask, HiMask; 2493 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); 2494 std::tie(LoMask, HiMask) = SplitMask(Mask); 2495 2496 SDValue UndefPassthru = DAG.getUNDEF(LoVT); 2497 Lo = DAG.getNode(ISD::VECTOR_COMPRESS, DL, LoVT, Lo, LoMask, UndefPassthru); 2498 Hi = DAG.getNode(ISD::VECTOR_COMPRESS, DL, HiVT, Hi, HiMask, UndefPassthru); 2499 2500 SDValue StackPtr = DAG.CreateStackTemporary( 2501 VecVT.getStoreSize(), DAG.getReducedAlign(VecVT, /*UseABI=*/false)); 2502 MachineFunction &MF = DAG.getMachineFunction(); 2503 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack( 2504 MF, cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex()); 2505 2506 // We store LoVec and then insert HiVec starting at offset=|1s| in LoMask. 2507 SDValue WideMask = 2508 DAG.getNode(ISD::ZERO_EXTEND, DL, LoMask.getValueType(), LoMask); 2509 SDValue Offset = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i32, WideMask); 2510 Offset = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Offset); 2511 2512 SDValue Chain = DAG.getEntryNode(); 2513 Chain = DAG.getStore(Chain, DL, Lo, StackPtr, PtrInfo); 2514 Chain = DAG.getStore(Chain, DL, Hi, Offset, 2515 MachinePointerInfo::getUnknownStack(MF)); 2516 2517 SDValue Compressed = DAG.getLoad(VecVT, DL, Chain, StackPtr, PtrInfo); 2518 if (!Passthru.isUndef()) { 2519 Compressed = 2520 DAG.getNode(ISD::VSELECT, DL, VecVT, Mask, Compressed, Passthru); 2521 } 2522 std::tie(Lo, Hi) = DAG.SplitVector(Compressed, DL); 2523 } 2524 2525 void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { 2526 assert(N->getValueType(0).isVector() && 2527 N->getOperand(0).getValueType().isVector() && 2528 "Operand types must be vectors"); 2529 2530 EVT LoVT, HiVT; 2531 SDLoc DL(N); 2532 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 2533 2534 // If the input also splits, handle it directly. Otherwise split it by hand. 2535 SDValue LL, LH, RL, RH; 2536 if (getTypeAction(N->getOperand(0).getValueType()) == 2537 TargetLowering::TypeSplitVector) 2538 GetSplitVector(N->getOperand(0), LL, LH); 2539 else 2540 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 2541 2542 if (getTypeAction(N->getOperand(1).getValueType()) == 2543 TargetLowering::TypeSplitVector) 2544 GetSplitVector(N->getOperand(1), RL, RH); 2545 else 2546 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 2547 2548 if (N->getOpcode() == ISD::SETCC) { 2549 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 2550 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 2551 } else { 2552 assert(N->getOpcode() == ISD::VP_SETCC && "Expected VP_SETCC opcode"); 2553 SDValue MaskLo, MaskHi, EVLLo, EVLHi; 2554 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); 2555 std::tie(EVLLo, EVLHi) = 2556 DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL); 2557 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2), MaskLo, 2558 EVLLo); 2559 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2), MaskHi, 2560 EVLHi); 2561 } 2562 } 2563 2564 void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, 2565 SDValue &Hi) { 2566 // Get the dest types - they may not match the input types, e.g. int_to_fp. 2567 EVT LoVT, HiVT; 2568 SDLoc dl(N); 2569 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 2570 2571 // If the input also splits, handle it directly for a compile time speedup. 2572 // Otherwise split it by hand. 2573 EVT InVT = N->getOperand(0).getValueType(); 2574 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) 2575 GetSplitVector(N->getOperand(0), Lo, Hi); 2576 else 2577 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); 2578 2579 const SDNodeFlags Flags = N->getFlags(); 2580 unsigned Opcode = N->getOpcode(); 2581 if (N->getNumOperands() <= 2) { 2582 if (Opcode == ISD::FP_ROUND) { 2583 Lo = DAG.getNode(Opcode, dl, LoVT, Lo, N->getOperand(1), Flags); 2584 Hi = DAG.getNode(Opcode, dl, HiVT, Hi, N->getOperand(1), Flags); 2585 } else { 2586 Lo = DAG.getNode(Opcode, dl, LoVT, Lo, Flags); 2587 Hi = DAG.getNode(Opcode, dl, HiVT, Hi, Flags); 2588 } 2589 return; 2590 } 2591 2592 assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); 2593 assert(N->isVPOpcode() && "Expected VP opcode"); 2594 2595 SDValue MaskLo, MaskHi; 2596 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); 2597 2598 SDValue EVLLo, EVLHi; 2599 std::tie(EVLLo, EVLHi) = 2600 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); 2601 2602 Lo = DAG.getNode(Opcode, dl, LoVT, {Lo, MaskLo, EVLLo}, Flags); 2603 Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags); 2604 } 2605 2606 void DAGTypeLegalizer::SplitVecRes_ADDRSPACECAST(SDNode *N, SDValue &Lo, 2607 SDValue &Hi) { 2608 SDLoc dl(N); 2609 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); 2610 2611 // If the input also splits, handle it directly for a compile time speedup. 2612 // Otherwise split it by hand. 2613 EVT InVT = N->getOperand(0).getValueType(); 2614 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) 2615 GetSplitVector(N->getOperand(0), Lo, Hi); 2616 else 2617 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); 2618 2619 auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N); 2620 unsigned SrcAS = AddrSpaceCastN->getSrcAddressSpace(); 2621 unsigned DestAS = AddrSpaceCastN->getDestAddressSpace(); 2622 Lo = DAG.getAddrSpaceCast(dl, LoVT, Lo, SrcAS, DestAS); 2623 Hi = DAG.getAddrSpaceCast(dl, HiVT, Hi, SrcAS, DestAS); 2624 } 2625 2626 void DAGTypeLegalizer::SplitVecRes_UnaryOpWithTwoResults(SDNode *N, 2627 unsigned ResNo, 2628 SDValue &Lo, 2629 SDValue &Hi) { 2630 SDLoc dl(N); 2631 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(N->getValueType(0)); 2632 auto [LoVT1, HiVT1] = DAG.GetSplitDestVTs(N->getValueType(1)); 2633 2634 // If the input also splits, handle it directly for a compile time speedup. 2635 // Otherwise split it by hand. 2636 EVT InVT = N->getOperand(0).getValueType(); 2637 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) 2638 GetSplitVector(N->getOperand(0), Lo, Hi); 2639 else 2640 std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); 2641 2642 Lo = DAG.getNode(N->getOpcode(), dl, {LoVT, LoVT1}, Lo); 2643 Hi = DAG.getNode(N->getOpcode(), dl, {HiVT, HiVT1}, Hi); 2644 Lo->setFlags(N->getFlags()); 2645 Hi->setFlags(N->getFlags()); 2646 2647 SDNode *HiNode = Hi.getNode(); 2648 SDNode *LoNode = Lo.getNode(); 2649 2650 // Replace the other vector result not being explicitly split here. 2651 unsigned OtherNo = 1 - ResNo; 2652 EVT OtherVT = N->getValueType(OtherNo); 2653 if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) { 2654 SetSplitVector(SDValue(N, OtherNo), SDValue(LoNode, OtherNo), 2655 SDValue(HiNode, OtherNo)); 2656 } else { 2657 SDValue OtherVal = 2658 DAG.getNode(ISD::CONCAT_VECTORS, dl, OtherVT, SDValue(LoNode, OtherNo), 2659 SDValue(HiNode, OtherNo)); 2660 ReplaceValueWith(SDValue(N, OtherNo), OtherVal); 2661 } 2662 } 2663 2664 void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, 2665 SDValue &Hi) { 2666 SDLoc dl(N); 2667 EVT SrcVT = N->getOperand(0).getValueType(); 2668 EVT DestVT = N->getValueType(0); 2669 EVT LoVT, HiVT; 2670 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); 2671 2672 // We can do better than a generic split operation if the extend is doing 2673 // more than just doubling the width of the elements and the following are 2674 // true: 2675 // - The number of vector elements is even, 2676 // - the source type is legal, 2677 // - the type of a split source is illegal, 2678 // - the type of an extended (by doubling element size) source is legal, and 2679 // - the type of that extended source when split is legal. 2680 // 2681 // This won't necessarily completely legalize the operation, but it will 2682 // more effectively move in the right direction and prevent falling down 2683 // to scalarization in many cases due to the input vector being split too 2684 // far. 2685 if (SrcVT.getVectorElementCount().isKnownEven() && 2686 SrcVT.getScalarSizeInBits() * 2 < DestVT.getScalarSizeInBits()) { 2687 LLVMContext &Ctx = *DAG.getContext(); 2688 EVT NewSrcVT = SrcVT.widenIntegerVectorElementType(Ctx); 2689 EVT SplitSrcVT = SrcVT.getHalfNumVectorElementsVT(Ctx); 2690 2691 EVT SplitLoVT, SplitHiVT; 2692 std::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); 2693 if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && 2694 TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { 2695 LLVM_DEBUG(dbgs() << "Split vector extend via incremental extend:"; 2696 N->dump(&DAG); dbgs() << "\n"); 2697 if (!N->isVPOpcode()) { 2698 // Extend the source vector by one step. 2699 SDValue NewSrc = 2700 DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); 2701 // Get the low and high halves of the new, extended one step, vector. 2702 std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); 2703 // Extend those vector halves the rest of the way. 2704 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); 2705 Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); 2706 return; 2707 } 2708 2709 // Extend the source vector by one step. 2710 SDValue NewSrc = 2711 DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0), 2712 N->getOperand(1), N->getOperand(2)); 2713 // Get the low and high halves of the new, extended one step, vector. 2714 std::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); 2715 2716 SDValue MaskLo, MaskHi; 2717 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); 2718 2719 SDValue EVLLo, EVLHi; 2720 std::tie(EVLLo, EVLHi) = 2721 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); 2722 // Extend those vector halves the rest of the way. 2723 Lo = DAG.getNode(N->getOpcode(), dl, LoVT, {Lo, MaskLo, EVLLo}); 2724 Hi = DAG.getNode(N->getOpcode(), dl, HiVT, {Hi, MaskHi, EVLHi}); 2725 return; 2726 } 2727 } 2728 // Fall back to the generic unary operator splitting otherwise. 2729 SplitVecRes_UnaryOp(N, Lo, Hi); 2730 } 2731 2732 void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, 2733 SDValue &Lo, SDValue &Hi) { 2734 // The low and high parts of the original input give four input vectors. 2735 SDValue Inputs[4]; 2736 SDLoc DL(N); 2737 GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); 2738 GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); 2739 EVT NewVT = Inputs[0].getValueType(); 2740 unsigned NewElts = NewVT.getVectorNumElements(); 2741 2742 auto &&IsConstant = [](const SDValue &N) { 2743 APInt SplatValue; 2744 return N.getResNo() == 0 && 2745 (ISD::isConstantSplatVector(N.getNode(), SplatValue) || 2746 ISD::isBuildVectorOfConstantSDNodes(N.getNode())); 2747 }; 2748 auto &&BuildVector = [NewElts, &DAG = DAG, NewVT, &DL](SDValue &Input1, 2749 SDValue &Input2, 2750 ArrayRef<int> Mask) { 2751 assert(Input1->getOpcode() == ISD::BUILD_VECTOR && 2752 Input2->getOpcode() == ISD::BUILD_VECTOR && 2753 "Expected build vector node."); 2754 EVT EltVT = NewVT.getVectorElementType(); 2755 SmallVector<SDValue> Ops(NewElts, DAG.getUNDEF(EltVT)); 2756 for (unsigned I = 0; I < NewElts; ++I) { 2757 if (Mask[I] == PoisonMaskElem) 2758 continue; 2759 unsigned Idx = Mask[I]; 2760 if (Idx >= NewElts) 2761 Ops[I] = Input2.getOperand(Idx - NewElts); 2762 else 2763 Ops[I] = Input1.getOperand(Idx); 2764 // Make the type of all elements the same as the element type. 2765 if (Ops[I].getValueType().bitsGT(EltVT)) 2766 Ops[I] = DAG.getNode(ISD::TRUNCATE, DL, EltVT, Ops[I]); 2767 } 2768 return DAG.getBuildVector(NewVT, DL, Ops); 2769 }; 2770 2771 // If Lo or Hi uses elements from at most two of the four input vectors, then 2772 // express it as a vector shuffle of those two inputs. Otherwise extract the 2773 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR. 2774 SmallVector<int> OrigMask(N->getMask()); 2775 // Try to pack incoming shuffles/inputs. 2776 auto &&TryPeekThroughShufflesInputs = [&Inputs, &NewVT, this, NewElts, 2777 &DL](SmallVectorImpl<int> &Mask) { 2778 // Check if all inputs are shuffles of the same operands or non-shuffles. 2779 MapVector<std::pair<SDValue, SDValue>, SmallVector<unsigned>> ShufflesIdxs; 2780 for (unsigned Idx = 0; Idx < std::size(Inputs); ++Idx) { 2781 SDValue Input = Inputs[Idx]; 2782 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Input.getNode()); 2783 if (!Shuffle || 2784 Input.getOperand(0).getValueType() != Input.getValueType()) 2785 continue; 2786 ShufflesIdxs[std::make_pair(Input.getOperand(0), Input.getOperand(1))] 2787 .push_back(Idx); 2788 ShufflesIdxs[std::make_pair(Input.getOperand(1), Input.getOperand(0))] 2789 .push_back(Idx); 2790 } 2791 for (auto &P : ShufflesIdxs) { 2792 if (P.second.size() < 2) 2793 continue; 2794 // Use shuffles operands instead of shuffles themselves. 2795 // 1. Adjust mask. 2796 for (int &Idx : Mask) { 2797 if (Idx == PoisonMaskElem) 2798 continue; 2799 unsigned SrcRegIdx = Idx / NewElts; 2800 if (Inputs[SrcRegIdx].isUndef()) { 2801 Idx = PoisonMaskElem; 2802 continue; 2803 } 2804 auto *Shuffle = 2805 dyn_cast<ShuffleVectorSDNode>(Inputs[SrcRegIdx].getNode()); 2806 if (!Shuffle || !is_contained(P.second, SrcRegIdx)) 2807 continue; 2808 int MaskElt = Shuffle->getMaskElt(Idx % NewElts); 2809 if (MaskElt == PoisonMaskElem) { 2810 Idx = PoisonMaskElem; 2811 continue; 2812 } 2813 Idx = MaskElt % NewElts + 2814 P.second[Shuffle->getOperand(MaskElt / NewElts) == P.first.first 2815 ? 0 2816 : 1] * 2817 NewElts; 2818 } 2819 // 2. Update inputs. 2820 Inputs[P.second[0]] = P.first.first; 2821 Inputs[P.second[1]] = P.first.second; 2822 // Clear the pair data. 2823 P.second.clear(); 2824 ShufflesIdxs[std::make_pair(P.first.second, P.first.first)].clear(); 2825 } 2826 // Check if any concat_vectors can be simplified. 2827 SmallBitVector UsedSubVector(2 * std::size(Inputs)); 2828 for (int &Idx : Mask) { 2829 if (Idx == PoisonMaskElem) 2830 continue; 2831 unsigned SrcRegIdx = Idx / NewElts; 2832 if (Inputs[SrcRegIdx].isUndef()) { 2833 Idx = PoisonMaskElem; 2834 continue; 2835 } 2836 TargetLowering::LegalizeTypeAction TypeAction = 2837 getTypeAction(Inputs[SrcRegIdx].getValueType()); 2838 if (Inputs[SrcRegIdx].getOpcode() == ISD::CONCAT_VECTORS && 2839 Inputs[SrcRegIdx].getNumOperands() == 2 && 2840 !Inputs[SrcRegIdx].getOperand(1).isUndef() && 2841 (TypeAction == TargetLowering::TypeLegal || 2842 TypeAction == TargetLowering::TypeWidenVector)) 2843 UsedSubVector.set(2 * SrcRegIdx + (Idx % NewElts) / (NewElts / 2)); 2844 } 2845 if (UsedSubVector.count() > 1) { 2846 SmallVector<SmallVector<std::pair<unsigned, int>, 2>> Pairs; 2847 for (unsigned I = 0; I < std::size(Inputs); ++I) { 2848 if (UsedSubVector.test(2 * I) == UsedSubVector.test(2 * I + 1)) 2849 continue; 2850 if (Pairs.empty() || Pairs.back().size() == 2) 2851 Pairs.emplace_back(); 2852 if (UsedSubVector.test(2 * I)) { 2853 Pairs.back().emplace_back(I, 0); 2854 } else { 2855 assert(UsedSubVector.test(2 * I + 1) && 2856 "Expected to be used one of the subvectors."); 2857 Pairs.back().emplace_back(I, 1); 2858 } 2859 } 2860 if (!Pairs.empty() && Pairs.front().size() > 1) { 2861 // Adjust mask. 2862 for (int &Idx : Mask) { 2863 if (Idx == PoisonMaskElem) 2864 continue; 2865 unsigned SrcRegIdx = Idx / NewElts; 2866 auto *It = find_if( 2867 Pairs, [SrcRegIdx](ArrayRef<std::pair<unsigned, int>> Idxs) { 2868 return Idxs.front().first == SrcRegIdx || 2869 Idxs.back().first == SrcRegIdx; 2870 }); 2871 if (It == Pairs.end()) 2872 continue; 2873 Idx = It->front().first * NewElts + (Idx % NewElts) % (NewElts / 2) + 2874 (SrcRegIdx == It->front().first ? 0 : (NewElts / 2)); 2875 } 2876 // Adjust inputs. 2877 for (ArrayRef<std::pair<unsigned, int>> Idxs : Pairs) { 2878 Inputs[Idxs.front().first] = DAG.getNode( 2879 ISD::CONCAT_VECTORS, DL, 2880 Inputs[Idxs.front().first].getValueType(), 2881 Inputs[Idxs.front().first].getOperand(Idxs.front().second), 2882 Inputs[Idxs.back().first].getOperand(Idxs.back().second)); 2883 } 2884 } 2885 } 2886 bool Changed; 2887 do { 2888 // Try to remove extra shuffles (except broadcasts) and shuffles with the 2889 // reused operands. 2890 Changed = false; 2891 for (unsigned I = 0; I < std::size(Inputs); ++I) { 2892 auto *Shuffle = dyn_cast<ShuffleVectorSDNode>(Inputs[I].getNode()); 2893 if (!Shuffle) 2894 continue; 2895 if (Shuffle->getOperand(0).getValueType() != NewVT) 2896 continue; 2897 int Op = -1; 2898 if (!Inputs[I].hasOneUse() && Shuffle->getOperand(1).isUndef() && 2899 !Shuffle->isSplat()) { 2900 Op = 0; 2901 } else if (!Inputs[I].hasOneUse() && 2902 !Shuffle->getOperand(1).isUndef()) { 2903 // Find the only used operand, if possible. 2904 for (int &Idx : Mask) { 2905 if (Idx == PoisonMaskElem) 2906 continue; 2907 unsigned SrcRegIdx = Idx / NewElts; 2908 if (SrcRegIdx != I) 2909 continue; 2910 int MaskElt = Shuffle->getMaskElt(Idx % NewElts); 2911 if (MaskElt == PoisonMaskElem) { 2912 Idx = PoisonMaskElem; 2913 continue; 2914 } 2915 int OpIdx = MaskElt / NewElts; 2916 if (Op == -1) { 2917 Op = OpIdx; 2918 continue; 2919 } 2920 if (Op != OpIdx) { 2921 Op = -1; 2922 break; 2923 } 2924 } 2925 } 2926 if (Op < 0) { 2927 // Try to check if one of the shuffle operands is used already. 2928 for (int OpIdx = 0; OpIdx < 2; ++OpIdx) { 2929 if (Shuffle->getOperand(OpIdx).isUndef()) 2930 continue; 2931 auto *It = find(Inputs, Shuffle->getOperand(OpIdx)); 2932 if (It == std::end(Inputs)) 2933 continue; 2934 int FoundOp = std::distance(std::begin(Inputs), It); 2935 // Found that operand is used already. 2936 // 1. Fix the mask for the reused operand. 2937 for (int &Idx : Mask) { 2938 if (Idx == PoisonMaskElem) 2939 continue; 2940 unsigned SrcRegIdx = Idx / NewElts; 2941 if (SrcRegIdx != I) 2942 continue; 2943 int MaskElt = Shuffle->getMaskElt(Idx % NewElts); 2944 if (MaskElt == PoisonMaskElem) { 2945 Idx = PoisonMaskElem; 2946 continue; 2947 } 2948 int MaskIdx = MaskElt / NewElts; 2949 if (OpIdx == MaskIdx) 2950 Idx = MaskElt % NewElts + FoundOp * NewElts; 2951 } 2952 // 2. Set Op to the unused OpIdx. 2953 Op = (OpIdx + 1) % 2; 2954 break; 2955 } 2956 } 2957 if (Op >= 0) { 2958 Changed = true; 2959 Inputs[I] = Shuffle->getOperand(Op); 2960 // Adjust mask. 2961 for (int &Idx : Mask) { 2962 if (Idx == PoisonMaskElem) 2963 continue; 2964 unsigned SrcRegIdx = Idx / NewElts; 2965 if (SrcRegIdx != I) 2966 continue; 2967 int MaskElt = Shuffle->getMaskElt(Idx % NewElts); 2968 int OpIdx = MaskElt / NewElts; 2969 if (OpIdx != Op) 2970 continue; 2971 Idx = MaskElt % NewElts + SrcRegIdx * NewElts; 2972 } 2973 } 2974 } 2975 } while (Changed); 2976 }; 2977 TryPeekThroughShufflesInputs(OrigMask); 2978 // Proces unique inputs. 2979 auto &&MakeUniqueInputs = [&Inputs, &IsConstant, 2980 NewElts](SmallVectorImpl<int> &Mask) { 2981 SetVector<SDValue> UniqueInputs; 2982 SetVector<SDValue> UniqueConstantInputs; 2983 for (const auto &I : Inputs) { 2984 if (IsConstant(I)) 2985 UniqueConstantInputs.insert(I); 2986 else if (!I.isUndef()) 2987 UniqueInputs.insert(I); 2988 } 2989 // Adjust mask in case of reused inputs. Also, need to insert constant 2990 // inputs at first, otherwise it affects the final outcome. 2991 if (UniqueInputs.size() != std::size(Inputs)) { 2992 auto &&UniqueVec = UniqueInputs.takeVector(); 2993 auto &&UniqueConstantVec = UniqueConstantInputs.takeVector(); 2994 unsigned ConstNum = UniqueConstantVec.size(); 2995 for (int &Idx : Mask) { 2996 if (Idx == PoisonMaskElem) 2997 continue; 2998 unsigned SrcRegIdx = Idx / NewElts; 2999 if (Inputs[SrcRegIdx].isUndef()) { 3000 Idx = PoisonMaskElem; 3001 continue; 3002 } 3003 const auto It = find(UniqueConstantVec, Inputs[SrcRegIdx]); 3004 if (It != UniqueConstantVec.end()) { 3005 Idx = (Idx % NewElts) + 3006 NewElts * std::distance(UniqueConstantVec.begin(), It); 3007 assert(Idx >= 0 && "Expected defined mask idx."); 3008 continue; 3009 } 3010 const auto RegIt = find(UniqueVec, Inputs[SrcRegIdx]); 3011 assert(RegIt != UniqueVec.end() && "Cannot find non-const value."); 3012 Idx = (Idx % NewElts) + 3013 NewElts * (std::distance(UniqueVec.begin(), RegIt) + ConstNum); 3014 assert(Idx >= 0 && "Expected defined mask idx."); 3015 } 3016 copy(UniqueConstantVec, std::begin(Inputs)); 3017 copy(UniqueVec, std::next(std::begin(Inputs), ConstNum)); 3018 } 3019 }; 3020 MakeUniqueInputs(OrigMask); 3021 SDValue OrigInputs[4]; 3022 copy(Inputs, std::begin(OrigInputs)); 3023 for (unsigned High = 0; High < 2; ++High) { 3024 SDValue &Output = High ? Hi : Lo; 3025 3026 // Build a shuffle mask for the output, discovering on the fly which 3027 // input vectors to use as shuffle operands. 3028 unsigned FirstMaskIdx = High * NewElts; 3029 SmallVector<int> Mask(NewElts * std::size(Inputs), PoisonMaskElem); 3030 copy(ArrayRef(OrigMask).slice(FirstMaskIdx, NewElts), Mask.begin()); 3031 assert(!Output && "Expected default initialized initial value."); 3032 TryPeekThroughShufflesInputs(Mask); 3033 MakeUniqueInputs(Mask); 3034 SDValue TmpInputs[4]; 3035 copy(Inputs, std::begin(TmpInputs)); 3036 // Track changes in the output registers. 3037 int UsedIdx = -1; 3038 bool SecondIteration = false; 3039 auto &&AccumulateResults = [&UsedIdx, &SecondIteration](unsigned Idx) { 3040 if (UsedIdx < 0) { 3041 UsedIdx = Idx; 3042 return false; 3043 } 3044 if (UsedIdx >= 0 && static_cast<unsigned>(UsedIdx) == Idx) 3045 SecondIteration = true; 3046 return SecondIteration; 3047 }; 3048 processShuffleMasks( 3049 Mask, std::size(Inputs), std::size(Inputs), 3050 /*NumOfUsedRegs=*/1, 3051 [&Output, &DAG = DAG, NewVT]() { Output = DAG.getUNDEF(NewVT); }, 3052 [&Output, &DAG = DAG, NewVT, &DL, &Inputs, 3053 &BuildVector](ArrayRef<int> Mask, unsigned Idx, unsigned /*Unused*/) { 3054 if (Inputs[Idx]->getOpcode() == ISD::BUILD_VECTOR) 3055 Output = BuildVector(Inputs[Idx], Inputs[Idx], Mask); 3056 else 3057 Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx], 3058 DAG.getUNDEF(NewVT), Mask); 3059 Inputs[Idx] = Output; 3060 }, 3061 [&AccumulateResults, &Output, &DAG = DAG, NewVT, &DL, &Inputs, 3062 &TmpInputs, &BuildVector](ArrayRef<int> Mask, unsigned Idx1, 3063 unsigned Idx2, bool /*Unused*/) { 3064 if (AccumulateResults(Idx1)) { 3065 if (Inputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR && 3066 Inputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR) 3067 Output = BuildVector(Inputs[Idx1], Inputs[Idx2], Mask); 3068 else 3069 Output = DAG.getVectorShuffle(NewVT, DL, Inputs[Idx1], 3070 Inputs[Idx2], Mask); 3071 } else { 3072 if (TmpInputs[Idx1]->getOpcode() == ISD::BUILD_VECTOR && 3073 TmpInputs[Idx2]->getOpcode() == ISD::BUILD_VECTOR) 3074 Output = BuildVector(TmpInputs[Idx1], TmpInputs[Idx2], Mask); 3075 else 3076 Output = DAG.getVectorShuffle(NewVT, DL, TmpInputs[Idx1], 3077 TmpInputs[Idx2], Mask); 3078 } 3079 Inputs[Idx1] = Output; 3080 }); 3081 copy(OrigInputs, std::begin(Inputs)); 3082 } 3083 } 3084 3085 void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { 3086 EVT OVT = N->getValueType(0); 3087 EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext()); 3088 SDValue Chain = N->getOperand(0); 3089 SDValue Ptr = N->getOperand(1); 3090 SDValue SV = N->getOperand(2); 3091 SDLoc dl(N); 3092 3093 const Align Alignment = 3094 DAG.getDataLayout().getABITypeAlign(NVT.getTypeForEVT(*DAG.getContext())); 3095 3096 Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment.value()); 3097 Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment.value()); 3098 Chain = Hi.getValue(1); 3099 3100 // Modified the chain - switch anything that used the old chain to use 3101 // the new one. 3102 ReplaceValueWith(SDValue(N, 1), Chain); 3103 } 3104 3105 void DAGTypeLegalizer::SplitVecRes_FP_TO_XINT_SAT(SDNode *N, SDValue &Lo, 3106 SDValue &Hi) { 3107 EVT DstVTLo, DstVTHi; 3108 std::tie(DstVTLo, DstVTHi) = DAG.GetSplitDestVTs(N->getValueType(0)); 3109 SDLoc dl(N); 3110 3111 SDValue SrcLo, SrcHi; 3112 EVT SrcVT = N->getOperand(0).getValueType(); 3113 if (getTypeAction(SrcVT) == TargetLowering::TypeSplitVector) 3114 GetSplitVector(N->getOperand(0), SrcLo, SrcHi); 3115 else 3116 std::tie(SrcLo, SrcHi) = DAG.SplitVectorOperand(N, 0); 3117 3118 Lo = DAG.getNode(N->getOpcode(), dl, DstVTLo, SrcLo, N->getOperand(1)); 3119 Hi = DAG.getNode(N->getOpcode(), dl, DstVTHi, SrcHi, N->getOperand(1)); 3120 } 3121 3122 void DAGTypeLegalizer::SplitVecRes_VECTOR_REVERSE(SDNode *N, SDValue &Lo, 3123 SDValue &Hi) { 3124 SDValue InLo, InHi; 3125 GetSplitVector(N->getOperand(0), InLo, InHi); 3126 SDLoc DL(N); 3127 3128 Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, InHi.getValueType(), InHi); 3129 Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, InLo.getValueType(), InLo); 3130 } 3131 3132 void DAGTypeLegalizer::SplitVecRes_VECTOR_SPLICE(SDNode *N, SDValue &Lo, 3133 SDValue &Hi) { 3134 SDLoc DL(N); 3135 3136 SDValue Expanded = TLI.expandVectorSplice(N, DAG); 3137 std::tie(Lo, Hi) = DAG.SplitVector(Expanded, DL); 3138 } 3139 3140 void DAGTypeLegalizer::SplitVecRes_VP_REVERSE(SDNode *N, SDValue &Lo, 3141 SDValue &Hi) { 3142 EVT VT = N->getValueType(0); 3143 SDValue Val = N->getOperand(0); 3144 SDValue Mask = N->getOperand(1); 3145 SDValue EVL = N->getOperand(2); 3146 SDLoc DL(N); 3147 3148 // Fallback to VP_STRIDED_STORE to stack followed by VP_LOAD. 3149 Align Alignment = DAG.getReducedAlign(VT, /*UseABI=*/false); 3150 3151 EVT MemVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 3152 VT.getVectorElementCount()); 3153 SDValue StackPtr = DAG.CreateStackTemporary(MemVT.getStoreSize(), Alignment); 3154 EVT PtrVT = StackPtr.getValueType(); 3155 auto &MF = DAG.getMachineFunction(); 3156 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); 3157 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); 3158 3159 MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand( 3160 PtrInfo, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), 3161 Alignment); 3162 MachineMemOperand *LoadMMO = DAG.getMachineFunction().getMachineMemOperand( 3163 PtrInfo, MachineMemOperand::MOLoad, LocationSize::beforeOrAfterPointer(), 3164 Alignment); 3165 3166 unsigned EltWidth = VT.getScalarSizeInBits() / 8; 3167 SDValue NumElemMinus1 = 3168 DAG.getNode(ISD::SUB, DL, PtrVT, DAG.getZExtOrTrunc(EVL, DL, PtrVT), 3169 DAG.getConstant(1, DL, PtrVT)); 3170 SDValue StartOffset = DAG.getNode(ISD::MUL, DL, PtrVT, NumElemMinus1, 3171 DAG.getConstant(EltWidth, DL, PtrVT)); 3172 SDValue StorePtr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, StartOffset); 3173 SDValue Stride = DAG.getConstant(-(int64_t)EltWidth, DL, PtrVT); 3174 3175 SDValue TrueMask = DAG.getBoolConstant(true, DL, Mask.getValueType(), VT); 3176 SDValue Store = DAG.getStridedStoreVP(DAG.getEntryNode(), DL, Val, StorePtr, 3177 DAG.getUNDEF(PtrVT), Stride, TrueMask, 3178 EVL, MemVT, StoreMMO, ISD::UNINDEXED); 3179 3180 SDValue Load = DAG.getLoadVP(VT, DL, Store, StackPtr, Mask, EVL, LoadMMO); 3181 3182 std::tie(Lo, Hi) = DAG.SplitVector(Load, DL); 3183 } 3184 3185 void DAGTypeLegalizer::SplitVecRes_VECTOR_DEINTERLEAVE(SDNode *N) { 3186 3187 SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi; 3188 GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); 3189 GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); 3190 EVT VT = Op0Lo.getValueType(); 3191 SDLoc DL(N); 3192 SDValue ResLo = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 3193 DAG.getVTList(VT, VT), Op0Lo, Op0Hi); 3194 SDValue ResHi = DAG.getNode(ISD::VECTOR_DEINTERLEAVE, DL, 3195 DAG.getVTList(VT, VT), Op1Lo, Op1Hi); 3196 3197 SetSplitVector(SDValue(N, 0), ResLo.getValue(0), ResHi.getValue(0)); 3198 SetSplitVector(SDValue(N, 1), ResLo.getValue(1), ResHi.getValue(1)); 3199 } 3200 3201 void DAGTypeLegalizer::SplitVecRes_VECTOR_INTERLEAVE(SDNode *N) { 3202 SDValue Op0Lo, Op0Hi, Op1Lo, Op1Hi; 3203 GetSplitVector(N->getOperand(0), Op0Lo, Op0Hi); 3204 GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); 3205 EVT VT = Op0Lo.getValueType(); 3206 SDLoc DL(N); 3207 SDValue Res[] = {DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 3208 DAG.getVTList(VT, VT), Op0Lo, Op1Lo), 3209 DAG.getNode(ISD::VECTOR_INTERLEAVE, DL, 3210 DAG.getVTList(VT, VT), Op0Hi, Op1Hi)}; 3211 3212 SetSplitVector(SDValue(N, 0), Res[0].getValue(0), Res[0].getValue(1)); 3213 SetSplitVector(SDValue(N, 1), Res[1].getValue(0), Res[1].getValue(1)); 3214 } 3215 3216 //===----------------------------------------------------------------------===// 3217 // Operand Vector Splitting 3218 //===----------------------------------------------------------------------===// 3219 3220 /// This method is called when the specified operand of the specified node is 3221 /// found to need vector splitting. At this point, all of the result types of 3222 /// the node are known to be legal, but other operands of the node may need 3223 /// legalization as well as the specified one. 3224 bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { 3225 LLVM_DEBUG(dbgs() << "Split node operand: "; N->dump(&DAG)); 3226 SDValue Res = SDValue(); 3227 3228 // See if the target wants to custom split this node. 3229 if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) 3230 return false; 3231 3232 switch (N->getOpcode()) { 3233 default: 3234 #ifndef NDEBUG 3235 dbgs() << "SplitVectorOperand Op #" << OpNo << ": "; 3236 N->dump(&DAG); 3237 dbgs() << "\n"; 3238 #endif 3239 report_fatal_error("Do not know how to split this operator's " 3240 "operand!\n"); 3241 3242 case ISD::VP_SETCC: 3243 case ISD::STRICT_FSETCC: 3244 case ISD::STRICT_FSETCCS: 3245 case ISD::SETCC: Res = SplitVecOp_VSETCC(N); break; 3246 case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break; 3247 case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break; 3248 case ISD::INSERT_SUBVECTOR: Res = SplitVecOp_INSERT_SUBVECTOR(N, OpNo); break; 3249 case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break; 3250 case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break; 3251 case ISD::VP_TRUNCATE: 3252 case ISD::TRUNCATE: 3253 Res = SplitVecOp_TruncateHelper(N); 3254 break; 3255 case ISD::STRICT_FP_ROUND: 3256 case ISD::VP_FP_ROUND: 3257 case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break; 3258 case ISD::FCOPYSIGN: Res = SplitVecOp_FPOpDifferentTypes(N); break; 3259 case ISD::STORE: 3260 Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo); 3261 break; 3262 case ISD::VP_STORE: 3263 Res = SplitVecOp_VP_STORE(cast<VPStoreSDNode>(N), OpNo); 3264 break; 3265 case ISD::EXPERIMENTAL_VP_STRIDED_STORE: 3266 Res = SplitVecOp_VP_STRIDED_STORE(cast<VPStridedStoreSDNode>(N), OpNo); 3267 break; 3268 case ISD::MSTORE: 3269 Res = SplitVecOp_MSTORE(cast<MaskedStoreSDNode>(N), OpNo); 3270 break; 3271 case ISD::MSCATTER: 3272 case ISD::VP_SCATTER: 3273 Res = SplitVecOp_Scatter(cast<MemSDNode>(N), OpNo); 3274 break; 3275 case ISD::MGATHER: 3276 case ISD::VP_GATHER: 3277 Res = SplitVecOp_Gather(cast<MemSDNode>(N), OpNo); 3278 break; 3279 case ISD::VSELECT: 3280 Res = SplitVecOp_VSELECT(N, OpNo); 3281 break; 3282 case ISD::VECTOR_COMPRESS: 3283 Res = SplitVecOp_VECTOR_COMPRESS(N, OpNo); 3284 break; 3285 case ISD::STRICT_SINT_TO_FP: 3286 case ISD::STRICT_UINT_TO_FP: 3287 case ISD::SINT_TO_FP: 3288 case ISD::UINT_TO_FP: 3289 case ISD::VP_SINT_TO_FP: 3290 case ISD::VP_UINT_TO_FP: 3291 if (N->getValueType(0).bitsLT( 3292 N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType())) 3293 Res = SplitVecOp_TruncateHelper(N); 3294 else 3295 Res = SplitVecOp_UnaryOp(N); 3296 break; 3297 case ISD::FP_TO_SINT_SAT: 3298 case ISD::FP_TO_UINT_SAT: 3299 Res = SplitVecOp_FP_TO_XINT_SAT(N); 3300 break; 3301 case ISD::FP_TO_SINT: 3302 case ISD::FP_TO_UINT: 3303 case ISD::VP_FP_TO_SINT: 3304 case ISD::VP_FP_TO_UINT: 3305 case ISD::STRICT_FP_TO_SINT: 3306 case ISD::STRICT_FP_TO_UINT: 3307 case ISD::STRICT_FP_EXTEND: 3308 case ISD::FP_EXTEND: 3309 case ISD::SIGN_EXTEND: 3310 case ISD::ZERO_EXTEND: 3311 case ISD::ANY_EXTEND: 3312 case ISD::FTRUNC: 3313 case ISD::LROUND: 3314 case ISD::LLROUND: 3315 case ISD::LRINT: 3316 case ISD::LLRINT: 3317 Res = SplitVecOp_UnaryOp(N); 3318 break; 3319 case ISD::FLDEXP: 3320 Res = SplitVecOp_FPOpDifferentTypes(N); 3321 break; 3322 3323 case ISD::SCMP: 3324 case ISD::UCMP: 3325 Res = SplitVecOp_CMP(N); 3326 break; 3327 3328 case ISD::FAKE_USE: 3329 Res = SplitVecOp_FAKE_USE(N); 3330 break; 3331 case ISD::ANY_EXTEND_VECTOR_INREG: 3332 case ISD::SIGN_EXTEND_VECTOR_INREG: 3333 case ISD::ZERO_EXTEND_VECTOR_INREG: 3334 Res = SplitVecOp_ExtVecInRegOp(N); 3335 break; 3336 3337 case ISD::VECREDUCE_FADD: 3338 case ISD::VECREDUCE_FMUL: 3339 case ISD::VECREDUCE_ADD: 3340 case ISD::VECREDUCE_MUL: 3341 case ISD::VECREDUCE_AND: 3342 case ISD::VECREDUCE_OR: 3343 case ISD::VECREDUCE_XOR: 3344 case ISD::VECREDUCE_SMAX: 3345 case ISD::VECREDUCE_SMIN: 3346 case ISD::VECREDUCE_UMAX: 3347 case ISD::VECREDUCE_UMIN: 3348 case ISD::VECREDUCE_FMAX: 3349 case ISD::VECREDUCE_FMIN: 3350 case ISD::VECREDUCE_FMAXIMUM: 3351 case ISD::VECREDUCE_FMINIMUM: 3352 Res = SplitVecOp_VECREDUCE(N, OpNo); 3353 break; 3354 case ISD::VECREDUCE_SEQ_FADD: 3355 case ISD::VECREDUCE_SEQ_FMUL: 3356 Res = SplitVecOp_VECREDUCE_SEQ(N); 3357 break; 3358 case ISD::VP_REDUCE_FADD: 3359 case ISD::VP_REDUCE_SEQ_FADD: 3360 case ISD::VP_REDUCE_FMUL: 3361 case ISD::VP_REDUCE_SEQ_FMUL: 3362 case ISD::VP_REDUCE_ADD: 3363 case ISD::VP_REDUCE_MUL: 3364 case ISD::VP_REDUCE_AND: 3365 case ISD::VP_REDUCE_OR: 3366 case ISD::VP_REDUCE_XOR: 3367 case ISD::VP_REDUCE_SMAX: 3368 case ISD::VP_REDUCE_SMIN: 3369 case ISD::VP_REDUCE_UMAX: 3370 case ISD::VP_REDUCE_UMIN: 3371 case ISD::VP_REDUCE_FMAX: 3372 case ISD::VP_REDUCE_FMIN: 3373 case ISD::VP_REDUCE_FMAXIMUM: 3374 case ISD::VP_REDUCE_FMINIMUM: 3375 Res = SplitVecOp_VP_REDUCE(N, OpNo); 3376 break; 3377 case ISD::VP_CTTZ_ELTS: 3378 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: 3379 Res = SplitVecOp_VP_CttzElements(N); 3380 break; 3381 case ISD::EXPERIMENTAL_VECTOR_HISTOGRAM: 3382 Res = SplitVecOp_VECTOR_HISTOGRAM(N); 3383 break; 3384 } 3385 3386 // If the result is null, the sub-method took care of registering results etc. 3387 if (!Res.getNode()) return false; 3388 3389 // If the result is N, the sub-method updated N in place. Tell the legalizer 3390 // core about this. 3391 if (Res.getNode() == N) 3392 return true; 3393 3394 if (N->isStrictFPOpcode()) 3395 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 && 3396 "Invalid operand expansion"); 3397 else 3398 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && 3399 "Invalid operand expansion"); 3400 3401 ReplaceValueWith(SDValue(N, 0), Res); 3402 return false; 3403 } 3404 3405 SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { 3406 // The only possibility for an illegal operand is the mask, since result type 3407 // legalization would have handled this node already otherwise. 3408 assert(OpNo == 0 && "Illegal operand must be mask"); 3409 3410 SDValue Mask = N->getOperand(0); 3411 SDValue Src0 = N->getOperand(1); 3412 SDValue Src1 = N->getOperand(2); 3413 EVT Src0VT = Src0.getValueType(); 3414 SDLoc DL(N); 3415 assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); 3416 3417 SDValue Lo, Hi; 3418 GetSplitVector(N->getOperand(0), Lo, Hi); 3419 assert(Lo.getValueType() == Hi.getValueType() && 3420 "Lo and Hi have differing types"); 3421 3422 EVT LoOpVT, HiOpVT; 3423 std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); 3424 assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); 3425 3426 SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; 3427 std::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); 3428 std::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); 3429 std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); 3430 3431 SDValue LoSelect = 3432 DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); 3433 SDValue HiSelect = 3434 DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1); 3435 3436 return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); 3437 } 3438 3439 SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_COMPRESS(SDNode *N, unsigned OpNo) { 3440 // The only possibility for an illegal operand is the mask, since result type 3441 // legalization would have handled this node already otherwise. 3442 assert(OpNo == 1 && "Illegal operand must be mask"); 3443 3444 // To split the mask, we need to split the result type too, so we can just 3445 // reuse that logic here. 3446 SDValue Lo, Hi; 3447 SplitVecRes_VECTOR_COMPRESS(N, Lo, Hi); 3448 3449 EVT VecVT = N->getValueType(0); 3450 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VecVT, Lo, Hi); 3451 } 3452 3453 SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE(SDNode *N, unsigned OpNo) { 3454 EVT ResVT = N->getValueType(0); 3455 SDValue Lo, Hi; 3456 SDLoc dl(N); 3457 3458 SDValue VecOp = N->getOperand(OpNo); 3459 EVT VecVT = VecOp.getValueType(); 3460 assert(VecVT.isVector() && "Can only split reduce vector operand"); 3461 GetSplitVector(VecOp, Lo, Hi); 3462 EVT LoOpVT, HiOpVT; 3463 std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT); 3464 3465 // Use the appropriate scalar instruction on the split subvectors before 3466 // reducing the now partially reduced smaller vector. 3467 unsigned CombineOpc = ISD::getVecReduceBaseOpcode(N->getOpcode()); 3468 SDValue Partial = DAG.getNode(CombineOpc, dl, LoOpVT, Lo, Hi, N->getFlags()); 3469 return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, N->getFlags()); 3470 } 3471 3472 SDValue DAGTypeLegalizer::SplitVecOp_VECREDUCE_SEQ(SDNode *N) { 3473 EVT ResVT = N->getValueType(0); 3474 SDValue Lo, Hi; 3475 SDLoc dl(N); 3476 3477 SDValue AccOp = N->getOperand(0); 3478 SDValue VecOp = N->getOperand(1); 3479 SDNodeFlags Flags = N->getFlags(); 3480 3481 EVT VecVT = VecOp.getValueType(); 3482 assert(VecVT.isVector() && "Can only split reduce vector operand"); 3483 GetSplitVector(VecOp, Lo, Hi); 3484 EVT LoOpVT, HiOpVT; 3485 std::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(VecVT); 3486 3487 // Reduce low half. 3488 SDValue Partial = DAG.getNode(N->getOpcode(), dl, ResVT, AccOp, Lo, Flags); 3489 3490 // Reduce high half, using low half result as initial value. 3491 return DAG.getNode(N->getOpcode(), dl, ResVT, Partial, Hi, Flags); 3492 } 3493 3494 SDValue DAGTypeLegalizer::SplitVecOp_VP_REDUCE(SDNode *N, unsigned OpNo) { 3495 assert(N->isVPOpcode() && "Expected VP opcode"); 3496 assert(OpNo == 1 && "Can only split reduce vector operand"); 3497 3498 unsigned Opc = N->getOpcode(); 3499 EVT ResVT = N->getValueType(0); 3500 SDValue Lo, Hi; 3501 SDLoc dl(N); 3502 3503 SDValue VecOp = N->getOperand(OpNo); 3504 EVT VecVT = VecOp.getValueType(); 3505 assert(VecVT.isVector() && "Can only split reduce vector operand"); 3506 GetSplitVector(VecOp, Lo, Hi); 3507 3508 SDValue MaskLo, MaskHi; 3509 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(2)); 3510 3511 SDValue EVLLo, EVLHi; 3512 std::tie(EVLLo, EVLHi) = DAG.SplitEVL(N->getOperand(3), VecVT, dl); 3513 3514 const SDNodeFlags Flags = N->getFlags(); 3515 3516 SDValue ResLo = 3517 DAG.getNode(Opc, dl, ResVT, {N->getOperand(0), Lo, MaskLo, EVLLo}, Flags); 3518 return DAG.getNode(Opc, dl, ResVT, {ResLo, Hi, MaskHi, EVLHi}, Flags); 3519 } 3520 3521 SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { 3522 // The result has a legal vector type, but the input needs splitting. 3523 EVT ResVT = N->getValueType(0); 3524 SDValue Lo, Hi; 3525 SDLoc dl(N); 3526 GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi); 3527 EVT InVT = Lo.getValueType(); 3528 3529 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), 3530 InVT.getVectorElementCount()); 3531 3532 if (N->isStrictFPOpcode()) { 3533 Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, 3534 { N->getOperand(0), Lo }); 3535 Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other }, 3536 { N->getOperand(0), Hi }); 3537 3538 // Build a factor node to remember that this operation is independent 3539 // of the other one. 3540 SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), 3541 Hi.getValue(1)); 3542 3543 // Legalize the chain result - switch anything that used the old chain to 3544 // use the new one. 3545 ReplaceValueWith(SDValue(N, 1), Ch); 3546 } else if (N->getNumOperands() == 3) { 3547 assert(N->isVPOpcode() && "Expected VP opcode"); 3548 SDValue MaskLo, MaskHi, EVLLo, EVLHi; 3549 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); 3550 std::tie(EVLLo, EVLHi) = 3551 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), dl); 3552 Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo, MaskLo, EVLLo); 3553 Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi, MaskHi, EVLHi); 3554 } else { 3555 Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo); 3556 Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi); 3557 } 3558 3559 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); 3560 } 3561 3562 // Split a FAKE_USE use of a vector into FAKE_USEs of hi and lo part. 3563 SDValue DAGTypeLegalizer::SplitVecOp_FAKE_USE(SDNode *N) { 3564 SDValue Lo, Hi; 3565 GetSplitVector(N->getOperand(1), Lo, Hi); 3566 SDValue Chain = 3567 DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), Lo); 3568 return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, Chain, Hi); 3569 } 3570 3571 SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { 3572 // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will 3573 // end up being split all the way down to individual components. Convert the 3574 // split pieces into integers and reassemble. 3575 EVT ResVT = N->getValueType(0); 3576 SDValue Lo, Hi; 3577 GetSplitVector(N->getOperand(0), Lo, Hi); 3578 SDLoc dl(N); 3579 3580 if (ResVT.isScalableVector()) { 3581 auto [LoVT, HiVT] = DAG.GetSplitDestVTs(ResVT); 3582 Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo); 3583 Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi); 3584 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); 3585 } 3586 3587 Lo = BitConvertToInteger(Lo); 3588 Hi = BitConvertToInteger(Hi); 3589 3590 if (DAG.getDataLayout().isBigEndian()) 3591 std::swap(Lo, Hi); 3592 3593 return DAG.getNode(ISD::BITCAST, dl, ResVT, JoinIntegers(Lo, Hi)); 3594 } 3595 3596 SDValue DAGTypeLegalizer::SplitVecOp_INSERT_SUBVECTOR(SDNode *N, 3597 unsigned OpNo) { 3598 assert(OpNo == 1 && "Invalid OpNo; can only split SubVec."); 3599 // We know that the result type is legal. 3600 EVT ResVT = N->getValueType(0); 3601 3602 SDValue Vec = N->getOperand(0); 3603 SDValue SubVec = N->getOperand(1); 3604 SDValue Idx = N->getOperand(2); 3605 SDLoc dl(N); 3606 3607 SDValue Lo, Hi; 3608 GetSplitVector(SubVec, Lo, Hi); 3609 3610 uint64_t IdxVal = Idx->getAsZExtVal(); 3611 uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); 3612 3613 SDValue FirstInsertion = 3614 DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Lo, Idx); 3615 SDValue SecondInsertion = 3616 DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, FirstInsertion, Hi, 3617 DAG.getVectorIdxConstant(IdxVal + LoElts, dl)); 3618 3619 return SecondInsertion; 3620 } 3621 3622 SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { 3623 // We know that the extracted result type is legal. 3624 EVT SubVT = N->getValueType(0); 3625 SDValue Idx = N->getOperand(1); 3626 SDLoc dl(N); 3627 SDValue Lo, Hi; 3628 3629 GetSplitVector(N->getOperand(0), Lo, Hi); 3630 3631 uint64_t LoEltsMin = Lo.getValueType().getVectorMinNumElements(); 3632 uint64_t IdxVal = Idx->getAsZExtVal(); 3633 3634 if (IdxVal < LoEltsMin) { 3635 assert(IdxVal + SubVT.getVectorMinNumElements() <= LoEltsMin && 3636 "Extracted subvector crosses vector split!"); 3637 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Lo, Idx); 3638 } else if (SubVT.isScalableVector() == 3639 N->getOperand(0).getValueType().isScalableVector()) 3640 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVT, Hi, 3641 DAG.getVectorIdxConstant(IdxVal - LoEltsMin, dl)); 3642 3643 // After this point the DAG node only permits extracting fixed-width 3644 // subvectors from scalable vectors. 3645 assert(SubVT.isFixedLengthVector() && 3646 "Extracting scalable subvector from fixed-width unsupported"); 3647 3648 // If the element type is i1 and we're not promoting the result, then we may 3649 // end up loading the wrong data since the bits are packed tightly into 3650 // bytes. For example, if we extract a v4i1 (legal) from a nxv4i1 (legal) 3651 // type at index 4, then we will load a byte starting at index 0. 3652 if (SubVT.getScalarType() == MVT::i1) 3653 report_fatal_error("Don't know how to extract fixed-width predicate " 3654 "subvector from a scalable predicate vector"); 3655 3656 // Spill the vector to the stack. We should use the alignment for 3657 // the smallest part. 3658 SDValue Vec = N->getOperand(0); 3659 EVT VecVT = Vec.getValueType(); 3660 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); 3661 SDValue StackPtr = 3662 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); 3663 auto &MF = DAG.getMachineFunction(); 3664 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); 3665 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); 3666 3667 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, 3668 SmallestAlign); 3669 3670 // Extract the subvector by loading the correct part. 3671 StackPtr = TLI.getVectorSubVecPointer(DAG, StackPtr, VecVT, SubVT, Idx); 3672 3673 return DAG.getLoad( 3674 SubVT, dl, Store, StackPtr, 3675 MachinePointerInfo::getUnknownStack(DAG.getMachineFunction())); 3676 } 3677 3678 SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { 3679 SDValue Vec = N->getOperand(0); 3680 SDValue Idx = N->getOperand(1); 3681 EVT VecVT = Vec.getValueType(); 3682 3683 if (const ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Idx)) { 3684 uint64_t IdxVal = Index->getZExtValue(); 3685 3686 SDValue Lo, Hi; 3687 GetSplitVector(Vec, Lo, Hi); 3688 3689 uint64_t LoElts = Lo.getValueType().getVectorMinNumElements(); 3690 3691 if (IdxVal < LoElts) 3692 return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); 3693 else if (!Vec.getValueType().isScalableVector()) 3694 return SDValue(DAG.UpdateNodeOperands(N, Hi, 3695 DAG.getConstant(IdxVal - LoElts, SDLoc(N), 3696 Idx.getValueType())), 0); 3697 } 3698 3699 // See if the target wants to custom expand this node. 3700 if (CustomLowerNode(N, N->getValueType(0), true)) 3701 return SDValue(); 3702 3703 // Make the vector elements byte-addressable if they aren't already. 3704 SDLoc dl(N); 3705 EVT EltVT = VecVT.getVectorElementType(); 3706 if (!EltVT.isByteSized()) { 3707 EltVT = EltVT.changeTypeToInteger().getRoundIntegerType(*DAG.getContext()); 3708 VecVT = VecVT.changeElementType(EltVT); 3709 Vec = DAG.getNode(ISD::ANY_EXTEND, dl, VecVT, Vec); 3710 SDValue NewExtract = 3711 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Vec, Idx); 3712 return DAG.getAnyExtOrTrunc(NewExtract, dl, N->getValueType(0)); 3713 } 3714 3715 // Store the vector to the stack. 3716 // In cases where the vector is illegal it will be broken down into parts 3717 // and stored in parts - we should use the alignment for the smallest part. 3718 Align SmallestAlign = DAG.getReducedAlign(VecVT, /*UseABI=*/false); 3719 SDValue StackPtr = 3720 DAG.CreateStackTemporary(VecVT.getStoreSize(), SmallestAlign); 3721 auto &MF = DAG.getMachineFunction(); 3722 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); 3723 auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIndex); 3724 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo, 3725 SmallestAlign); 3726 3727 // Load back the required element. 3728 StackPtr = TLI.getVectorElementPointer(DAG, StackPtr, VecVT, Idx); 3729 3730 // EXTRACT_VECTOR_ELT can extend the element type to the width of the return 3731 // type, leaving the high bits undefined. But it can't truncate. 3732 assert(N->getValueType(0).bitsGE(EltVT) && "Illegal EXTRACT_VECTOR_ELT."); 3733 3734 return DAG.getExtLoad( 3735 ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, 3736 MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()), EltVT, 3737 commonAlignment(SmallestAlign, EltVT.getFixedSizeInBits() / 8)); 3738 } 3739 3740 SDValue DAGTypeLegalizer::SplitVecOp_ExtVecInRegOp(SDNode *N) { 3741 SDValue Lo, Hi; 3742 3743 // *_EXTEND_VECTOR_INREG only reference the lower half of the input, so 3744 // splitting the result has the same effect as splitting the input operand. 3745 SplitVecRes_ExtVecInRegOp(N, Lo, Hi); 3746 3747 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), N->getValueType(0), Lo, Hi); 3748 } 3749 3750 SDValue DAGTypeLegalizer::SplitVecOp_Gather(MemSDNode *N, unsigned OpNo) { 3751 (void)OpNo; 3752 SDValue Lo, Hi; 3753 SplitVecRes_Gather(N, Lo, Hi); 3754 3755 SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, N, N->getValueType(0), Lo, Hi); 3756 ReplaceValueWith(SDValue(N, 0), Res); 3757 return SDValue(); 3758 } 3759 3760 SDValue DAGTypeLegalizer::SplitVecOp_VP_STORE(VPStoreSDNode *N, unsigned OpNo) { 3761 assert(N->isUnindexed() && "Indexed vp_store of vector?"); 3762 SDValue Ch = N->getChain(); 3763 SDValue Ptr = N->getBasePtr(); 3764 SDValue Offset = N->getOffset(); 3765 assert(Offset.isUndef() && "Unexpected VP store offset"); 3766 SDValue Mask = N->getMask(); 3767 SDValue EVL = N->getVectorLength(); 3768 SDValue Data = N->getValue(); 3769 Align Alignment = N->getOriginalAlign(); 3770 SDLoc DL(N); 3771 3772 SDValue DataLo, DataHi; 3773 if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) 3774 // Split Data operand 3775 GetSplitVector(Data, DataLo, DataHi); 3776 else 3777 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 3778 3779 // Split Mask operand 3780 SDValue MaskLo, MaskHi; 3781 if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { 3782 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); 3783 } else { 3784 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) 3785 GetSplitVector(Mask, MaskLo, MaskHi); 3786 else 3787 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); 3788 } 3789 3790 EVT MemoryVT = N->getMemoryVT(); 3791 EVT LoMemVT, HiMemVT; 3792 bool HiIsEmpty = false; 3793 std::tie(LoMemVT, HiMemVT) = 3794 DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); 3795 3796 // Split EVL 3797 SDValue EVLLo, EVLHi; 3798 std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, Data.getValueType(), DL); 3799 3800 SDValue Lo, Hi; 3801 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 3802 N->getPointerInfo(), MachineMemOperand::MOStore, 3803 LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), 3804 N->getRanges()); 3805 3806 Lo = DAG.getStoreVP(Ch, DL, DataLo, Ptr, Offset, MaskLo, EVLLo, LoMemVT, MMO, 3807 N->getAddressingMode(), N->isTruncatingStore(), 3808 N->isCompressingStore()); 3809 3810 // If the hi vp_store has zero storage size, only the lo vp_store is needed. 3811 if (HiIsEmpty) 3812 return Lo; 3813 3814 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, 3815 N->isCompressingStore()); 3816 3817 MachinePointerInfo MPI; 3818 if (LoMemVT.isScalableVector()) { 3819 Alignment = commonAlignment(Alignment, 3820 LoMemVT.getSizeInBits().getKnownMinValue() / 8); 3821 MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); 3822 } else 3823 MPI = N->getPointerInfo().getWithOffset( 3824 LoMemVT.getStoreSize().getFixedValue()); 3825 3826 MMO = DAG.getMachineFunction().getMachineMemOperand( 3827 MPI, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), 3828 Alignment, N->getAAInfo(), N->getRanges()); 3829 3830 Hi = DAG.getStoreVP(Ch, DL, DataHi, Ptr, Offset, MaskHi, EVLHi, HiMemVT, MMO, 3831 N->getAddressingMode(), N->isTruncatingStore(), 3832 N->isCompressingStore()); 3833 3834 // Build a factor node to remember that this store is independent of the 3835 // other one. 3836 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 3837 } 3838 3839 SDValue DAGTypeLegalizer::SplitVecOp_VP_STRIDED_STORE(VPStridedStoreSDNode *N, 3840 unsigned OpNo) { 3841 assert(N->isUnindexed() && "Indexed vp_strided_store of a vector?"); 3842 assert(N->getOffset().isUndef() && "Unexpected VP strided store offset"); 3843 3844 SDLoc DL(N); 3845 3846 SDValue Data = N->getValue(); 3847 SDValue LoData, HiData; 3848 if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) 3849 GetSplitVector(Data, LoData, HiData); 3850 else 3851 std::tie(LoData, HiData) = DAG.SplitVector(Data, DL); 3852 3853 EVT LoMemVT, HiMemVT; 3854 bool HiIsEmpty = false; 3855 std::tie(LoMemVT, HiMemVT) = DAG.GetDependentSplitDestVTs( 3856 N->getMemoryVT(), LoData.getValueType(), &HiIsEmpty); 3857 3858 SDValue Mask = N->getMask(); 3859 SDValue LoMask, HiMask; 3860 if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) 3861 SplitVecRes_SETCC(Mask.getNode(), LoMask, HiMask); 3862 else if (getTypeAction(Mask.getValueType()) == 3863 TargetLowering::TypeSplitVector) 3864 GetSplitVector(Mask, LoMask, HiMask); 3865 else 3866 std::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); 3867 3868 SDValue LoEVL, HiEVL; 3869 std::tie(LoEVL, HiEVL) = 3870 DAG.SplitEVL(N->getVectorLength(), Data.getValueType(), DL); 3871 3872 // Generate the low vp_strided_store 3873 SDValue Lo = DAG.getStridedStoreVP( 3874 N->getChain(), DL, LoData, N->getBasePtr(), N->getOffset(), 3875 N->getStride(), LoMask, LoEVL, LoMemVT, N->getMemOperand(), 3876 N->getAddressingMode(), N->isTruncatingStore(), N->isCompressingStore()); 3877 3878 // If the high vp_strided_store has zero storage size, only the low 3879 // vp_strided_store is needed. 3880 if (HiIsEmpty) 3881 return Lo; 3882 3883 // Generate the high vp_strided_store. 3884 // To calculate the high base address, we need to sum to the low base 3885 // address stride number of bytes for each element already stored by low, 3886 // that is: Ptr = Ptr + (LoEVL * Stride) 3887 EVT PtrVT = N->getBasePtr().getValueType(); 3888 SDValue Increment = 3889 DAG.getNode(ISD::MUL, DL, PtrVT, LoEVL, 3890 DAG.getSExtOrTrunc(N->getStride(), DL, PtrVT)); 3891 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, N->getBasePtr(), Increment); 3892 3893 Align Alignment = N->getOriginalAlign(); 3894 if (LoMemVT.isScalableVector()) 3895 Alignment = commonAlignment(Alignment, 3896 LoMemVT.getSizeInBits().getKnownMinValue() / 8); 3897 3898 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 3899 MachinePointerInfo(N->getPointerInfo().getAddrSpace()), 3900 MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), 3901 Alignment, N->getAAInfo(), N->getRanges()); 3902 3903 SDValue Hi = DAG.getStridedStoreVP( 3904 N->getChain(), DL, HiData, Ptr, N->getOffset(), N->getStride(), HiMask, 3905 HiEVL, HiMemVT, MMO, N->getAddressingMode(), N->isTruncatingStore(), 3906 N->isCompressingStore()); 3907 3908 // Build a factor node to remember that this store is independent of the 3909 // other one. 3910 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 3911 } 3912 3913 SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N, 3914 unsigned OpNo) { 3915 assert(N->isUnindexed() && "Indexed masked store of vector?"); 3916 SDValue Ch = N->getChain(); 3917 SDValue Ptr = N->getBasePtr(); 3918 SDValue Offset = N->getOffset(); 3919 assert(Offset.isUndef() && "Unexpected indexed masked store offset"); 3920 SDValue Mask = N->getMask(); 3921 SDValue Data = N->getValue(); 3922 Align Alignment = N->getOriginalAlign(); 3923 SDLoc DL(N); 3924 3925 SDValue DataLo, DataHi; 3926 if (getTypeAction(Data.getValueType()) == TargetLowering::TypeSplitVector) 3927 // Split Data operand 3928 GetSplitVector(Data, DataLo, DataHi); 3929 else 3930 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 3931 3932 // Split Mask operand 3933 SDValue MaskLo, MaskHi; 3934 if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) { 3935 SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); 3936 } else { 3937 if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) 3938 GetSplitVector(Mask, MaskLo, MaskHi); 3939 else 3940 std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL); 3941 } 3942 3943 EVT MemoryVT = N->getMemoryVT(); 3944 EVT LoMemVT, HiMemVT; 3945 bool HiIsEmpty = false; 3946 std::tie(LoMemVT, HiMemVT) = 3947 DAG.GetDependentSplitDestVTs(MemoryVT, DataLo.getValueType(), &HiIsEmpty); 3948 3949 SDValue Lo, Hi, Res; 3950 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 3951 N->getPointerInfo(), MachineMemOperand::MOStore, 3952 LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), 3953 N->getRanges()); 3954 3955 Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, Offset, MaskLo, LoMemVT, MMO, 3956 N->getAddressingMode(), N->isTruncatingStore(), 3957 N->isCompressingStore()); 3958 3959 if (HiIsEmpty) { 3960 // The hi masked store has zero storage size. 3961 // Only the lo masked store is needed. 3962 Res = Lo; 3963 } else { 3964 3965 Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG, 3966 N->isCompressingStore()); 3967 3968 MachinePointerInfo MPI; 3969 if (LoMemVT.isScalableVector()) { 3970 Alignment = commonAlignment( 3971 Alignment, LoMemVT.getSizeInBits().getKnownMinValue() / 8); 3972 MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace()); 3973 } else 3974 MPI = N->getPointerInfo().getWithOffset( 3975 LoMemVT.getStoreSize().getFixedValue()); 3976 3977 MMO = DAG.getMachineFunction().getMachineMemOperand( 3978 MPI, MachineMemOperand::MOStore, LocationSize::beforeOrAfterPointer(), 3979 Alignment, N->getAAInfo(), N->getRanges()); 3980 3981 Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, Offset, MaskHi, HiMemVT, MMO, 3982 N->getAddressingMode(), N->isTruncatingStore(), 3983 N->isCompressingStore()); 3984 3985 // Build a factor node to remember that this store is independent of the 3986 // other one. 3987 Res = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 3988 } 3989 3990 return Res; 3991 } 3992 3993 SDValue DAGTypeLegalizer::SplitVecOp_Scatter(MemSDNode *N, unsigned OpNo) { 3994 SDValue Ch = N->getChain(); 3995 SDValue Ptr = N->getBasePtr(); 3996 EVT MemoryVT = N->getMemoryVT(); 3997 Align Alignment = N->getOriginalAlign(); 3998 SDLoc DL(N); 3999 struct Operands { 4000 SDValue Mask; 4001 SDValue Index; 4002 SDValue Scale; 4003 SDValue Data; 4004 } Ops = [&]() -> Operands { 4005 if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) { 4006 return {MSC->getMask(), MSC->getIndex(), MSC->getScale(), 4007 MSC->getValue()}; 4008 } 4009 auto *VPSC = cast<VPScatterSDNode>(N); 4010 return {VPSC->getMask(), VPSC->getIndex(), VPSC->getScale(), 4011 VPSC->getValue()}; 4012 }(); 4013 // Split all operands 4014 4015 EVT LoMemVT, HiMemVT; 4016 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4017 4018 SDValue DataLo, DataHi; 4019 if (getTypeAction(Ops.Data.getValueType()) == TargetLowering::TypeSplitVector) 4020 // Split Data operand 4021 GetSplitVector(Ops.Data, DataLo, DataHi); 4022 else 4023 std::tie(DataLo, DataHi) = DAG.SplitVector(Ops.Data, DL); 4024 4025 // Split Mask operand 4026 SDValue MaskLo, MaskHi; 4027 if (OpNo == 1 && Ops.Mask.getOpcode() == ISD::SETCC) { 4028 SplitVecRes_SETCC(Ops.Mask.getNode(), MaskLo, MaskHi); 4029 } else { 4030 std::tie(MaskLo, MaskHi) = SplitMask(Ops.Mask, DL); 4031 } 4032 4033 SDValue IndexHi, IndexLo; 4034 if (getTypeAction(Ops.Index.getValueType()) == 4035 TargetLowering::TypeSplitVector) 4036 GetSplitVector(Ops.Index, IndexLo, IndexHi); 4037 else 4038 std::tie(IndexLo, IndexHi) = DAG.SplitVector(Ops.Index, DL); 4039 4040 SDValue Lo; 4041 MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( 4042 N->getPointerInfo(), MachineMemOperand::MOStore, 4043 LocationSize::beforeOrAfterPointer(), Alignment, N->getAAInfo(), 4044 N->getRanges()); 4045 4046 if (auto *MSC = dyn_cast<MaskedScatterSDNode>(N)) { 4047 SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Ops.Scale}; 4048 Lo = 4049 DAG.getMaskedScatter(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO, 4050 MSC->getIndexType(), MSC->isTruncatingStore()); 4051 4052 // The order of the Scatter operation after split is well defined. The "Hi" 4053 // part comes after the "Lo". So these two operations should be chained one 4054 // after another. 4055 SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Ops.Scale}; 4056 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, 4057 MMO, MSC->getIndexType(), 4058 MSC->isTruncatingStore()); 4059 } 4060 auto *VPSC = cast<VPScatterSDNode>(N); 4061 SDValue EVLLo, EVLHi; 4062 std::tie(EVLLo, EVLHi) = 4063 DAG.SplitEVL(VPSC->getVectorLength(), Ops.Data.getValueType(), DL); 4064 4065 SDValue OpsLo[] = {Ch, DataLo, Ptr, IndexLo, Ops.Scale, MaskLo, EVLLo}; 4066 Lo = DAG.getScatterVP(DAG.getVTList(MVT::Other), LoMemVT, DL, OpsLo, MMO, 4067 VPSC->getIndexType()); 4068 4069 // The order of the Scatter operation after split is well defined. The "Hi" 4070 // part comes after the "Lo". So these two operations should be chained one 4071 // after another. 4072 SDValue OpsHi[] = {Lo, DataHi, Ptr, IndexHi, Ops.Scale, MaskHi, EVLHi}; 4073 return DAG.getScatterVP(DAG.getVTList(MVT::Other), HiMemVT, DL, OpsHi, MMO, 4074 VPSC->getIndexType()); 4075 } 4076 4077 SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { 4078 assert(N->isUnindexed() && "Indexed store of vector?"); 4079 assert(OpNo == 1 && "Can only split the stored value"); 4080 SDLoc DL(N); 4081 4082 bool isTruncating = N->isTruncatingStore(); 4083 SDValue Ch = N->getChain(); 4084 SDValue Ptr = N->getBasePtr(); 4085 EVT MemoryVT = N->getMemoryVT(); 4086 Align Alignment = N->getOriginalAlign(); 4087 MachineMemOperand::Flags MMOFlags = N->getMemOperand()->getFlags(); 4088 AAMDNodes AAInfo = N->getAAInfo(); 4089 SDValue Lo, Hi; 4090 GetSplitVector(N->getOperand(1), Lo, Hi); 4091 4092 EVT LoMemVT, HiMemVT; 4093 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 4094 4095 // Scalarize if the split halves are not byte-sized. 4096 if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) 4097 return TLI.scalarizeVectorStore(N, DAG); 4098 4099 if (isTruncating) 4100 Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT, 4101 Alignment, MMOFlags, AAInfo); 4102 else 4103 Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), Alignment, MMOFlags, 4104 AAInfo); 4105 4106 MachinePointerInfo MPI; 4107 IncrementPointer(N, LoMemVT, MPI, Ptr); 4108 4109 if (isTruncating) 4110 Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI, 4111 HiMemVT, Alignment, MMOFlags, AAInfo); 4112 else 4113 Hi = DAG.getStore(Ch, DL, Hi, Ptr, MPI, Alignment, MMOFlags, AAInfo); 4114 4115 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 4116 } 4117 4118 SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { 4119 SDLoc DL(N); 4120 4121 // The input operands all must have the same type, and we know the result 4122 // type is valid. Convert this to a buildvector which extracts all the 4123 // input elements. 4124 // TODO: If the input elements are power-two vectors, we could convert this to 4125 // a new CONCAT_VECTORS node with elements that are half-wide. 4126 SmallVector<SDValue, 32> Elts; 4127 EVT EltVT = N->getValueType(0).getVectorElementType(); 4128 for (const SDValue &Op : N->op_values()) { 4129 for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); 4130 i != e; ++i) { 4131 Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Op, 4132 DAG.getVectorIdxConstant(i, DL))); 4133 } 4134 } 4135 4136 return DAG.getBuildVector(N->getValueType(0), DL, Elts); 4137 } 4138 4139 SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) { 4140 // The result type is legal, but the input type is illegal. If splitting 4141 // ends up with the result type of each half still being legal, just 4142 // do that. If, however, that would result in an illegal result type, 4143 // we can try to get more clever with power-two vectors. Specifically, 4144 // split the input type, but also widen the result element size, then 4145 // concatenate the halves and truncate again. For example, consider a target 4146 // where v8i8 is legal and v8i32 is not (ARM, which doesn't have 256-bit 4147 // vectors). To perform a "%res = v8i8 trunc v8i32 %in" we do: 4148 // %inlo = v4i32 extract_subvector %in, 0 4149 // %inhi = v4i32 extract_subvector %in, 4 4150 // %lo16 = v4i16 trunc v4i32 %inlo 4151 // %hi16 = v4i16 trunc v4i32 %inhi 4152 // %in16 = v8i16 concat_vectors v4i16 %lo16, v4i16 %hi16 4153 // %res = v8i8 trunc v8i16 %in16 4154 // 4155 // Without this transform, the original truncate would end up being 4156 // scalarized, which is pretty much always a last resort. 4157 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 4158 SDValue InVec = N->getOperand(OpNo); 4159 EVT InVT = InVec->getValueType(0); 4160 EVT OutVT = N->getValueType(0); 4161 ElementCount NumElements = OutVT.getVectorElementCount(); 4162 bool IsFloat = OutVT.isFloatingPoint(); 4163 4164 unsigned InElementSize = InVT.getScalarSizeInBits(); 4165 unsigned OutElementSize = OutVT.getScalarSizeInBits(); 4166 4167 // Determine the split output VT. If its legal we can just split dirctly. 4168 EVT LoOutVT, HiOutVT; 4169 std::tie(LoOutVT, HiOutVT) = DAG.GetSplitDestVTs(OutVT); 4170 assert(LoOutVT == HiOutVT && "Unequal split?"); 4171 4172 // If the input elements are only 1/2 the width of the result elements, 4173 // just use the normal splitting. Our trick only work if there's room 4174 // to split more than once. 4175 if (isTypeLegal(LoOutVT) || 4176 InElementSize <= OutElementSize * 2) 4177 return SplitVecOp_UnaryOp(N); 4178 SDLoc DL(N); 4179 4180 // Don't touch if this will be scalarized. 4181 EVT FinalVT = InVT; 4182 while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector) 4183 FinalVT = FinalVT.getHalfNumVectorElementsVT(*DAG.getContext()); 4184 4185 if (getTypeAction(FinalVT) == TargetLowering::TypeScalarizeVector) 4186 return SplitVecOp_UnaryOp(N); 4187 4188 // Get the split input vector. 4189 SDValue InLoVec, InHiVec; 4190 GetSplitVector(InVec, InLoVec, InHiVec); 4191 4192 // Truncate them to 1/2 the element size. 4193 // 4194 // This assumes the number of elements is a power of two; any vector that 4195 // isn't should be widened, not split. 4196 EVT HalfElementVT = IsFloat ? 4197 EVT::getFloatingPointVT(InElementSize/2) : 4198 EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); 4199 EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, 4200 NumElements.divideCoefficientBy(2)); 4201 4202 SDValue HalfLo; 4203 SDValue HalfHi; 4204 SDValue Chain; 4205 if (N->isStrictFPOpcode()) { 4206 HalfLo = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, 4207 {N->getOperand(0), InLoVec}); 4208 HalfHi = DAG.getNode(N->getOpcode(), DL, {HalfVT, MVT::Other}, 4209 {N->getOperand(0), InHiVec}); 4210 // Legalize the chain result - switch anything that used the old chain to 4211 // use the new one. 4212 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, HalfLo.getValue(1), 4213 HalfHi.getValue(1)); 4214 } else { 4215 HalfLo = DAG.getNode(N->getOpcode(), DL, HalfVT, InLoVec); 4216 HalfHi = DAG.getNode(N->getOpcode(), DL, HalfVT, InHiVec); 4217 } 4218 4219 // Concatenate them to get the full intermediate truncation result. 4220 EVT InterVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, NumElements); 4221 SDValue InterVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InterVT, HalfLo, 4222 HalfHi); 4223 // Now finish up by truncating all the way down to the original result 4224 // type. This should normally be something that ends up being legal directly, 4225 // but in theory if a target has very wide vectors and an annoyingly 4226 // restricted set of legal types, this split can chain to build things up. 4227 4228 if (N->isStrictFPOpcode()) { 4229 SDValue Res = DAG.getNode( 4230 ISD::STRICT_FP_ROUND, DL, {OutVT, MVT::Other}, 4231 {Chain, InterVec, 4232 DAG.getTargetConstant(0, DL, TLI.getPointerTy(DAG.getDataLayout()))}); 4233 // Relink the chain 4234 ReplaceValueWith(SDValue(N, 1), SDValue(Res.getNode(), 1)); 4235 return Res; 4236 } 4237 4238 return IsFloat 4239 ? DAG.getNode(ISD::FP_ROUND, DL, OutVT, InterVec, 4240 DAG.getTargetConstant( 4241 0, DL, TLI.getPointerTy(DAG.getDataLayout()))) 4242 : DAG.getNode(ISD::TRUNCATE, DL, OutVT, InterVec); 4243 } 4244 4245 SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { 4246 unsigned Opc = N->getOpcode(); 4247 bool isStrict = Opc == ISD::STRICT_FSETCC || Opc == ISD::STRICT_FSETCCS; 4248 assert(N->getValueType(0).isVector() && 4249 N->getOperand(isStrict ? 1 : 0).getValueType().isVector() && 4250 "Operand types must be vectors"); 4251 // The result has a legal vector type, but the input needs splitting. 4252 SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; 4253 SDLoc DL(N); 4254 GetSplitVector(N->getOperand(isStrict ? 1 : 0), Lo0, Hi0); 4255 GetSplitVector(N->getOperand(isStrict ? 2 : 1), Lo1, Hi1); 4256 4257 auto PartEltCnt = Lo0.getValueType().getVectorElementCount(); 4258 4259 LLVMContext &Context = *DAG.getContext(); 4260 EVT PartResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt); 4261 EVT WideResVT = EVT::getVectorVT(Context, MVT::i1, PartEltCnt*2); 4262 4263 if (Opc == ISD::SETCC) { 4264 LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2)); 4265 HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2)); 4266 } else if (isStrict) { 4267 LoRes = DAG.getNode(Opc, DL, DAG.getVTList(PartResVT, N->getValueType(1)), 4268 N->getOperand(0), Lo0, Lo1, N->getOperand(3)); 4269 HiRes = DAG.getNode(Opc, DL, DAG.getVTList(PartResVT, N->getValueType(1)), 4270 N->getOperand(0), Hi0, Hi1, N->getOperand(3)); 4271 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 4272 LoRes.getValue(1), HiRes.getValue(1)); 4273 ReplaceValueWith(SDValue(N, 1), NewChain); 4274 } else { 4275 assert(Opc == ISD::VP_SETCC && "Expected VP_SETCC opcode"); 4276 SDValue MaskLo, MaskHi, EVLLo, EVLHi; 4277 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(3)); 4278 std::tie(EVLLo, EVLHi) = 4279 DAG.SplitEVL(N->getOperand(4), N->getValueType(0), DL); 4280 LoRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Lo0, Lo1, 4281 N->getOperand(2), MaskLo, EVLLo); 4282 HiRes = DAG.getNode(ISD::VP_SETCC, DL, PartResVT, Hi0, Hi1, 4283 N->getOperand(2), MaskHi, EVLHi); 4284 } 4285 SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes); 4286 4287 EVT OpVT = N->getOperand(0).getValueType(); 4288 ISD::NodeType ExtendCode = 4289 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); 4290 return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con); 4291 } 4292 4293 4294 SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { 4295 // The result has a legal vector type, but the input needs splitting. 4296 EVT ResVT = N->getValueType(0); 4297 SDValue Lo, Hi; 4298 SDLoc DL(N); 4299 GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi); 4300 EVT InVT = Lo.getValueType(); 4301 4302 EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), 4303 InVT.getVectorElementCount()); 4304 4305 if (N->isStrictFPOpcode()) { 4306 Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, 4307 { N->getOperand(0), Lo, N->getOperand(2) }); 4308 Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other }, 4309 { N->getOperand(0), Hi, N->getOperand(2) }); 4310 // Legalize the chain result - switch anything that used the old chain to 4311 // use the new one. 4312 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 4313 Lo.getValue(1), Hi.getValue(1)); 4314 ReplaceValueWith(SDValue(N, 1), NewChain); 4315 } else if (N->getOpcode() == ISD::VP_FP_ROUND) { 4316 SDValue MaskLo, MaskHi, EVLLo, EVLHi; 4317 std::tie(MaskLo, MaskHi) = SplitMask(N->getOperand(1)); 4318 std::tie(EVLLo, EVLHi) = 4319 DAG.SplitEVL(N->getOperand(2), N->getValueType(0), DL); 4320 Lo = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Lo, MaskLo, EVLLo); 4321 Hi = DAG.getNode(ISD::VP_FP_ROUND, DL, OutVT, Hi, MaskHi, EVLHi); 4322 } else { 4323 Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1)); 4324 Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1)); 4325 } 4326 4327 return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi); 4328 } 4329 4330 // Split a vector type in an FP binary operation where the second operand has a 4331 // different type from the first. 4332 // 4333 // The result (and the first input) has a legal vector type, but the second 4334 // input needs splitting. 4335 SDValue DAGTypeLegalizer::SplitVecOp_FPOpDifferentTypes(SDNode *N) { 4336 SDLoc DL(N); 4337 4338 EVT LHSLoVT, LHSHiVT; 4339 std::tie(LHSLoVT, LHSHiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 4340 4341 if (!isTypeLegal(LHSLoVT) || !isTypeLegal(LHSHiVT)) 4342 return DAG.UnrollVectorOp(N, N->getValueType(0).getVectorNumElements()); 4343 4344 SDValue LHSLo, LHSHi; 4345 std::tie(LHSLo, LHSHi) = 4346 DAG.SplitVector(N->getOperand(0), DL, LHSLoVT, LHSHiVT); 4347 4348 SDValue RHSLo, RHSHi; 4349 std::tie(RHSLo, RHSHi) = DAG.SplitVector(N->getOperand(1), DL); 4350 4351 SDValue Lo = DAG.getNode(N->getOpcode(), DL, LHSLoVT, LHSLo, RHSLo); 4352 SDValue Hi = DAG.getNode(N->getOpcode(), DL, LHSHiVT, LHSHi, RHSHi); 4353 4354 return DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0), Lo, Hi); 4355 } 4356 4357 SDValue DAGTypeLegalizer::SplitVecOp_CMP(SDNode *N) { 4358 LLVMContext &Ctxt = *DAG.getContext(); 4359 SDLoc dl(N); 4360 4361 SDValue LHSLo, LHSHi, RHSLo, RHSHi; 4362 GetSplitVector(N->getOperand(0), LHSLo, LHSHi); 4363 GetSplitVector(N->getOperand(1), RHSLo, RHSHi); 4364 4365 EVT ResVT = N->getValueType(0); 4366 ElementCount SplitOpEC = LHSLo.getValueType().getVectorElementCount(); 4367 EVT NewResVT = 4368 EVT::getVectorVT(Ctxt, ResVT.getVectorElementType(), SplitOpEC); 4369 4370 SDValue Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSLo, RHSLo); 4371 SDValue Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, LHSHi, RHSHi); 4372 4373 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); 4374 } 4375 4376 SDValue DAGTypeLegalizer::SplitVecOp_FP_TO_XINT_SAT(SDNode *N) { 4377 EVT ResVT = N->getValueType(0); 4378 SDValue Lo, Hi; 4379 SDLoc dl(N); 4380 GetSplitVector(N->getOperand(0), Lo, Hi); 4381 EVT InVT = Lo.getValueType(); 4382 4383 EVT NewResVT = 4384 EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(), 4385 InVT.getVectorElementCount()); 4386 4387 Lo = DAG.getNode(N->getOpcode(), dl, NewResVT, Lo, N->getOperand(1)); 4388 Hi = DAG.getNode(N->getOpcode(), dl, NewResVT, Hi, N->getOperand(1)); 4389 4390 return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); 4391 } 4392 4393 SDValue DAGTypeLegalizer::SplitVecOp_VP_CttzElements(SDNode *N) { 4394 SDLoc DL(N); 4395 EVT ResVT = N->getValueType(0); 4396 4397 SDValue Lo, Hi; 4398 SDValue VecOp = N->getOperand(0); 4399 GetSplitVector(VecOp, Lo, Hi); 4400 4401 auto [MaskLo, MaskHi] = SplitMask(N->getOperand(1)); 4402 auto [EVLLo, EVLHi] = 4403 DAG.SplitEVL(N->getOperand(2), VecOp.getValueType(), DL); 4404 SDValue VLo = DAG.getZExtOrTrunc(EVLLo, DL, ResVT); 4405 4406 // if VP_CTTZ_ELTS(Lo) != EVLLo => VP_CTTZ_ELTS(Lo). 4407 // else => EVLLo + (VP_CTTZ_ELTS(Hi) or VP_CTTZ_ELTS_ZERO_UNDEF(Hi)). 4408 SDValue ResLo = DAG.getNode(ISD::VP_CTTZ_ELTS, DL, ResVT, Lo, MaskLo, EVLLo); 4409 SDValue ResLoNotEVL = 4410 DAG.getSetCC(DL, getSetCCResultType(ResVT), ResLo, VLo, ISD::SETNE); 4411 SDValue ResHi = DAG.getNode(N->getOpcode(), DL, ResVT, Hi, MaskHi, EVLHi); 4412 return DAG.getSelect(DL, ResVT, ResLoNotEVL, ResLo, 4413 DAG.getNode(ISD::ADD, DL, ResVT, VLo, ResHi)); 4414 } 4415 4416 SDValue DAGTypeLegalizer::SplitVecOp_VECTOR_HISTOGRAM(SDNode *N) { 4417 MaskedHistogramSDNode *HG = cast<MaskedHistogramSDNode>(N); 4418 SDLoc DL(HG); 4419 SDValue Inc = HG->getInc(); 4420 SDValue Ptr = HG->getBasePtr(); 4421 SDValue Scale = HG->getScale(); 4422 SDValue IntID = HG->getIntID(); 4423 EVT MemVT = HG->getMemoryVT(); 4424 MachineMemOperand *MMO = HG->getMemOperand(); 4425 ISD::MemIndexType IndexType = HG->getIndexType(); 4426 4427 SDValue IndexLo, IndexHi, MaskLo, MaskHi; 4428 std::tie(IndexLo, IndexHi) = DAG.SplitVector(HG->getIndex(), DL); 4429 std::tie(MaskLo, MaskHi) = DAG.SplitVector(HG->getMask(), DL); 4430 SDValue OpsLo[] = {HG->getChain(), Inc, MaskLo, Ptr, IndexLo, Scale, IntID}; 4431 SDValue Lo = DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, 4432 OpsLo, MMO, IndexType); 4433 SDValue OpsHi[] = {Lo, Inc, MaskHi, Ptr, IndexHi, Scale, IntID}; 4434 return DAG.getMaskedHistogram(DAG.getVTList(MVT::Other), MemVT, DL, OpsHi, 4435 MMO, IndexType); 4436 } 4437 4438 //===----------------------------------------------------------------------===// 4439 // Result Vector Widening 4440 //===----------------------------------------------------------------------===// 4441 4442 void DAGTypeLegalizer::ReplaceOtherWidenResults(SDNode *N, SDNode *WidenNode, 4443 unsigned WidenResNo) { 4444 unsigned NumResults = N->getNumValues(); 4445 for (unsigned ResNo = 0; ResNo < NumResults; ResNo++) { 4446 if (ResNo == WidenResNo) 4447 continue; 4448 EVT ResVT = N->getValueType(ResNo); 4449 if (getTypeAction(ResVT) == TargetLowering::TypeWidenVector) { 4450 SetWidenedVector(SDValue(N, ResNo), SDValue(WidenNode, ResNo)); 4451 } else { 4452 SDLoc DL(N); 4453 SDValue ResVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, 4454 SDValue(WidenNode, ResNo), 4455 DAG.getVectorIdxConstant(0, DL)); 4456 ReplaceValueWith(SDValue(N, ResNo), ResVal); 4457 } 4458 } 4459 } 4460 4461 void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { 4462 LLVM_DEBUG(dbgs() << "Widen node result " << ResNo << ": "; N->dump(&DAG)); 4463 4464 // See if the target wants to custom widen this node. 4465 if (CustomWidenLowerNode(N, N->getValueType(ResNo))) 4466 return; 4467 4468 SDValue Res = SDValue(); 4469 4470 auto unrollExpandedOp = [&]() { 4471 // We're going to widen this vector op to a legal type by padding with undef 4472 // elements. If the wide vector op is eventually going to be expanded to 4473 // scalar libcalls, then unroll into scalar ops now to avoid unnecessary 4474 // libcalls on the undef elements. 4475 EVT VT = N->getValueType(0); 4476 EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 4477 if (!TLI.isOperationLegalOrCustomOrPromote(N->getOpcode(), WideVecVT) && 4478 TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) { 4479 Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements()); 4480 if (N->getNumValues() > 1) 4481 ReplaceOtherWidenResults(N, Res.getNode(), ResNo); 4482 return true; 4483 } 4484 return false; 4485 }; 4486 4487 switch (N->getOpcode()) { 4488 default: 4489 #ifndef NDEBUG 4490 dbgs() << "WidenVectorResult #" << ResNo << ": "; 4491 N->dump(&DAG); 4492 dbgs() << "\n"; 4493 #endif 4494 report_fatal_error("Do not know how to widen the result of this operator!"); 4495 4496 case ISD::MERGE_VALUES: Res = WidenVecRes_MERGE_VALUES(N, ResNo); break; 4497 case ISD::ADDRSPACECAST: 4498 Res = WidenVecRes_ADDRSPACECAST(N); 4499 break; 4500 case ISD::AssertZext: Res = WidenVecRes_AssertZext(N); break; 4501 case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break; 4502 case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break; 4503 case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break; 4504 case ISD::INSERT_SUBVECTOR: 4505 Res = WidenVecRes_INSERT_SUBVECTOR(N); 4506 break; 4507 case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break; 4508 case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break; 4509 case ISD::LOAD: Res = WidenVecRes_LOAD(N); break; 4510 case ISD::STEP_VECTOR: 4511 case ISD::SPLAT_VECTOR: 4512 case ISD::SCALAR_TO_VECTOR: 4513 case ISD::EXPERIMENTAL_VP_SPLAT: 4514 Res = WidenVecRes_ScalarOp(N); 4515 break; 4516 case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break; 4517 case ISD::VSELECT: 4518 case ISD::SELECT: 4519 case ISD::VP_SELECT: 4520 case ISD::VP_MERGE: 4521 Res = WidenVecRes_Select(N); 4522 break; 4523 case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break; 4524 case ISD::VP_SETCC: 4525 case ISD::SETCC: Res = WidenVecRes_SETCC(N); break; 4526 case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break; 4527 case ISD::VECTOR_SHUFFLE: 4528 Res = WidenVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N)); 4529 break; 4530 case ISD::VP_LOAD: 4531 Res = WidenVecRes_VP_LOAD(cast<VPLoadSDNode>(N)); 4532 break; 4533 case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: 4534 Res = WidenVecRes_VP_STRIDED_LOAD(cast<VPStridedLoadSDNode>(N)); 4535 break; 4536 case ISD::VECTOR_COMPRESS: 4537 Res = WidenVecRes_VECTOR_COMPRESS(N); 4538 break; 4539 case ISD::MLOAD: 4540 Res = WidenVecRes_MLOAD(cast<MaskedLoadSDNode>(N)); 4541 break; 4542 case ISD::MGATHER: 4543 Res = WidenVecRes_MGATHER(cast<MaskedGatherSDNode>(N)); 4544 break; 4545 case ISD::VP_GATHER: 4546 Res = WidenVecRes_VP_GATHER(cast<VPGatherSDNode>(N)); 4547 break; 4548 case ISD::VECTOR_REVERSE: 4549 Res = WidenVecRes_VECTOR_REVERSE(N); 4550 break; 4551 4552 case ISD::ADD: case ISD::VP_ADD: 4553 case ISD::AND: case ISD::VP_AND: 4554 case ISD::MUL: case ISD::VP_MUL: 4555 case ISD::MULHS: 4556 case ISD::MULHU: 4557 case ISD::ABDS: 4558 case ISD::ABDU: 4559 case ISD::OR: case ISD::VP_OR: 4560 case ISD::SUB: case ISD::VP_SUB: 4561 case ISD::XOR: case ISD::VP_XOR: 4562 case ISD::SHL: case ISD::VP_SHL: 4563 case ISD::SRA: case ISD::VP_SRA: 4564 case ISD::SRL: case ISD::VP_SRL: 4565 case ISD::FMINNUM: 4566 case ISD::FMINNUM_IEEE: 4567 case ISD::VP_FMINNUM: 4568 case ISD::FMAXNUM: 4569 case ISD::FMAXNUM_IEEE: 4570 case ISD::VP_FMAXNUM: 4571 case ISD::FMINIMUM: 4572 case ISD::VP_FMINIMUM: 4573 case ISD::FMAXIMUM: 4574 case ISD::VP_FMAXIMUM: 4575 case ISD::FMINIMUMNUM: 4576 case ISD::FMAXIMUMNUM: 4577 case ISD::SMIN: case ISD::VP_SMIN: 4578 case ISD::SMAX: case ISD::VP_SMAX: 4579 case ISD::UMIN: case ISD::VP_UMIN: 4580 case ISD::UMAX: case ISD::VP_UMAX: 4581 case ISD::UADDSAT: case ISD::VP_UADDSAT: 4582 case ISD::SADDSAT: case ISD::VP_SADDSAT: 4583 case ISD::USUBSAT: case ISD::VP_USUBSAT: 4584 case ISD::SSUBSAT: case ISD::VP_SSUBSAT: 4585 case ISD::SSHLSAT: 4586 case ISD::USHLSAT: 4587 case ISD::ROTL: 4588 case ISD::ROTR: 4589 case ISD::AVGFLOORS: 4590 case ISD::AVGFLOORU: 4591 case ISD::AVGCEILS: 4592 case ISD::AVGCEILU: 4593 // Vector-predicated binary op widening. Note that -- unlike the 4594 // unpredicated versions -- we don't have to worry about trapping on 4595 // operations like UDIV, FADD, etc., as we pass on the original vector 4596 // length parameter. This means the widened elements containing garbage 4597 // aren't active. 4598 case ISD::VP_SDIV: 4599 case ISD::VP_UDIV: 4600 case ISD::VP_SREM: 4601 case ISD::VP_UREM: 4602 case ISD::VP_FADD: 4603 case ISD::VP_FSUB: 4604 case ISD::VP_FMUL: 4605 case ISD::VP_FDIV: 4606 case ISD::VP_FREM: 4607 case ISD::VP_FCOPYSIGN: 4608 Res = WidenVecRes_Binary(N); 4609 break; 4610 4611 case ISD::SCMP: 4612 case ISD::UCMP: 4613 Res = WidenVecRes_CMP(N); 4614 break; 4615 4616 case ISD::FPOW: 4617 case ISD::FATAN2: 4618 case ISD::FREM: 4619 if (unrollExpandedOp()) 4620 break; 4621 // If the target has custom/legal support for the scalar FP intrinsic ops 4622 // (they are probably not destined to become libcalls), then widen those 4623 // like any other binary ops. 4624 [[fallthrough]]; 4625 4626 case ISD::FADD: 4627 case ISD::FMUL: 4628 case ISD::FSUB: 4629 case ISD::FDIV: 4630 case ISD::SDIV: 4631 case ISD::UDIV: 4632 case ISD::SREM: 4633 case ISD::UREM: 4634 Res = WidenVecRes_BinaryCanTrap(N); 4635 break; 4636 4637 case ISD::SMULFIX: 4638 case ISD::SMULFIXSAT: 4639 case ISD::UMULFIX: 4640 case ISD::UMULFIXSAT: 4641 // These are binary operations, but with an extra operand that shouldn't 4642 // be widened (the scale). 4643 Res = WidenVecRes_BinaryWithExtraScalarOp(N); 4644 break; 4645 4646 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ 4647 case ISD::STRICT_##DAGN: 4648 #include "llvm/IR/ConstrainedOps.def" 4649 Res = WidenVecRes_StrictFP(N); 4650 break; 4651 4652 case ISD::UADDO: 4653 case ISD::SADDO: 4654 case ISD::USUBO: 4655 case ISD::SSUBO: 4656 case ISD::UMULO: 4657 case ISD::SMULO: 4658 Res = WidenVecRes_OverflowOp(N, ResNo); 4659 break; 4660 4661 case ISD::FCOPYSIGN: 4662 Res = WidenVecRes_FCOPYSIGN(N); 4663 break; 4664 4665 case ISD::IS_FPCLASS: 4666 case ISD::FPTRUNC_ROUND: 4667 Res = WidenVecRes_UnarySameEltsWithScalarArg(N); 4668 break; 4669 4670 case ISD::FLDEXP: 4671 case ISD::FPOWI: 4672 if (!unrollExpandedOp()) 4673 Res = WidenVecRes_ExpOp(N); 4674 break; 4675 4676 case ISD::ANY_EXTEND_VECTOR_INREG: 4677 case ISD::SIGN_EXTEND_VECTOR_INREG: 4678 case ISD::ZERO_EXTEND_VECTOR_INREG: 4679 Res = WidenVecRes_EXTEND_VECTOR_INREG(N); 4680 break; 4681 4682 case ISD::ANY_EXTEND: 4683 case ISD::FP_EXTEND: 4684 case ISD::VP_FP_EXTEND: 4685 case ISD::FP_ROUND: 4686 case ISD::VP_FP_ROUND: 4687 case ISD::FP_TO_SINT: 4688 case ISD::VP_FP_TO_SINT: 4689 case ISD::FP_TO_UINT: 4690 case ISD::VP_FP_TO_UINT: 4691 case ISD::SIGN_EXTEND: 4692 case ISD::VP_SIGN_EXTEND: 4693 case ISD::SINT_TO_FP: 4694 case ISD::VP_SINT_TO_FP: 4695 case ISD::VP_TRUNCATE: 4696 case ISD::TRUNCATE: 4697 case ISD::UINT_TO_FP: 4698 case ISD::VP_UINT_TO_FP: 4699 case ISD::ZERO_EXTEND: 4700 case ISD::VP_ZERO_EXTEND: 4701 Res = WidenVecRes_Convert(N); 4702 break; 4703 4704 case ISD::FP_TO_SINT_SAT: 4705 case ISD::FP_TO_UINT_SAT: 4706 Res = WidenVecRes_FP_TO_XINT_SAT(N); 4707 break; 4708 4709 case ISD::LRINT: 4710 case ISD::LLRINT: 4711 case ISD::VP_LRINT: 4712 case ISD::VP_LLRINT: 4713 case ISD::LROUND: 4714 case ISD::LLROUND: 4715 Res = WidenVecRes_XROUND(N); 4716 break; 4717 4718 case ISD::FACOS: 4719 case ISD::FASIN: 4720 case ISD::FATAN: 4721 case ISD::FCEIL: 4722 case ISD::FCOS: 4723 case ISD::FCOSH: 4724 case ISD::FEXP: 4725 case ISD::FEXP2: 4726 case ISD::FEXP10: 4727 case ISD::FFLOOR: 4728 case ISD::FLOG: 4729 case ISD::FLOG10: 4730 case ISD::FLOG2: 4731 case ISD::FNEARBYINT: 4732 case ISD::FRINT: 4733 case ISD::FROUND: 4734 case ISD::FROUNDEVEN: 4735 case ISD::FSIN: 4736 case ISD::FSINH: 4737 case ISD::FSQRT: 4738 case ISD::FTAN: 4739 case ISD::FTANH: 4740 case ISD::FTRUNC: 4741 if (unrollExpandedOp()) 4742 break; 4743 // If the target has custom/legal support for the scalar FP intrinsic ops 4744 // (they are probably not destined to become libcalls), then widen those 4745 // like any other unary ops. 4746 [[fallthrough]]; 4747 4748 case ISD::ABS: 4749 case ISD::VP_ABS: 4750 case ISD::BITREVERSE: 4751 case ISD::VP_BITREVERSE: 4752 case ISD::BSWAP: 4753 case ISD::VP_BSWAP: 4754 case ISD::CTLZ: 4755 case ISD::VP_CTLZ: 4756 case ISD::CTLZ_ZERO_UNDEF: 4757 case ISD::VP_CTLZ_ZERO_UNDEF: 4758 case ISD::CTPOP: 4759 case ISD::VP_CTPOP: 4760 case ISD::CTTZ: 4761 case ISD::VP_CTTZ: 4762 case ISD::CTTZ_ZERO_UNDEF: 4763 case ISD::VP_CTTZ_ZERO_UNDEF: 4764 case ISD::FNEG: case ISD::VP_FNEG: 4765 case ISD::FABS: case ISD::VP_FABS: 4766 case ISD::VP_SQRT: 4767 case ISD::VP_FCEIL: 4768 case ISD::VP_FFLOOR: 4769 case ISD::VP_FRINT: 4770 case ISD::VP_FNEARBYINT: 4771 case ISD::VP_FROUND: 4772 case ISD::VP_FROUNDEVEN: 4773 case ISD::VP_FROUNDTOZERO: 4774 case ISD::FREEZE: 4775 case ISD::ARITH_FENCE: 4776 case ISD::FCANONICALIZE: 4777 Res = WidenVecRes_Unary(N); 4778 break; 4779 case ISD::FMA: case ISD::VP_FMA: 4780 case ISD::FSHL: 4781 case ISD::VP_FSHL: 4782 case ISD::FSHR: 4783 case ISD::VP_FSHR: 4784 Res = WidenVecRes_Ternary(N); 4785 break; 4786 case ISD::FFREXP: 4787 case ISD::FSINCOS: { 4788 if (!unrollExpandedOp()) 4789 Res = WidenVecRes_UnaryOpWithTwoResults(N, ResNo); 4790 break; 4791 } 4792 } 4793 4794 // If Res is null, the sub-method took care of registering the result. 4795 if (Res.getNode()) 4796 SetWidenedVector(SDValue(N, ResNo), Res); 4797 } 4798 4799 SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { 4800 // Ternary op widening. 4801 SDLoc dl(N); 4802 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 4803 SDValue InOp1 = GetWidenedVector(N->getOperand(0)); 4804 SDValue InOp2 = GetWidenedVector(N->getOperand(1)); 4805 SDValue InOp3 = GetWidenedVector(N->getOperand(2)); 4806 if (N->getNumOperands() == 3) 4807 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3); 4808 4809 assert(N->getNumOperands() == 5 && "Unexpected number of operands!"); 4810 assert(N->isVPOpcode() && "Expected VP opcode"); 4811 4812 SDValue Mask = 4813 GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount()); 4814 return DAG.getNode(N->getOpcode(), dl, WidenVT, 4815 {InOp1, InOp2, InOp3, Mask, N->getOperand(4)}); 4816 } 4817 4818 SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { 4819 // Binary op widening. 4820 SDLoc dl(N); 4821 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 4822 SDValue InOp1 = GetWidenedVector(N->getOperand(0)); 4823 SDValue InOp2 = GetWidenedVector(N->getOperand(1)); 4824 if (N->getNumOperands() == 2) 4825 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, 4826 N->getFlags()); 4827 4828 assert(N->getNumOperands() == 4 && "Unexpected number of operands!"); 4829 assert(N->isVPOpcode() && "Expected VP opcode"); 4830 4831 SDValue Mask = 4832 GetWidenedMask(N->getOperand(2), WidenVT.getVectorElementCount()); 4833 return DAG.getNode(N->getOpcode(), dl, WidenVT, 4834 {InOp1, InOp2, Mask, N->getOperand(3)}, N->getFlags()); 4835 } 4836 4837 SDValue DAGTypeLegalizer::WidenVecRes_CMP(SDNode *N) { 4838 LLVMContext &Ctxt = *DAG.getContext(); 4839 SDLoc dl(N); 4840 4841 SDValue LHS = N->getOperand(0); 4842 SDValue RHS = N->getOperand(1); 4843 EVT OpVT = LHS.getValueType(); 4844 if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector) { 4845 LHS = GetWidenedVector(LHS); 4846 RHS = GetWidenedVector(RHS); 4847 OpVT = LHS.getValueType(); 4848 } 4849 4850 EVT WidenResVT = TLI.getTypeToTransformTo(Ctxt, N->getValueType(0)); 4851 ElementCount WidenResEC = WidenResVT.getVectorElementCount(); 4852 if (WidenResEC == OpVT.getVectorElementCount()) { 4853 return DAG.getNode(N->getOpcode(), dl, WidenResVT, LHS, RHS); 4854 } 4855 4856 return DAG.UnrollVectorOp(N, WidenResVT.getVectorNumElements()); 4857 } 4858 4859 SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) { 4860 // Binary op widening, but with an extra operand that shouldn't be widened. 4861 SDLoc dl(N); 4862 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 4863 SDValue InOp1 = GetWidenedVector(N->getOperand(0)); 4864 SDValue InOp2 = GetWidenedVector(N->getOperand(1)); 4865 SDValue InOp3 = N->getOperand(2); 4866 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3, 4867 N->getFlags()); 4868 } 4869 4870 // Given a vector of operations that have been broken up to widen, see 4871 // if we can collect them together into the next widest legal VT. This 4872 // implementation is trap-safe. 4873 static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI, 4874 SmallVectorImpl<SDValue> &ConcatOps, 4875 unsigned ConcatEnd, EVT VT, EVT MaxVT, 4876 EVT WidenVT) { 4877 // Check to see if we have a single operation with the widen type. 4878 if (ConcatEnd == 1) { 4879 VT = ConcatOps[0].getValueType(); 4880 if (VT == WidenVT) 4881 return ConcatOps[0]; 4882 } 4883 4884 SDLoc dl(ConcatOps[0]); 4885 EVT WidenEltVT = WidenVT.getVectorElementType(); 4886 4887 // while (Some element of ConcatOps is not of type MaxVT) { 4888 // From the end of ConcatOps, collect elements of the same type and put 4889 // them into an op of the next larger supported type 4890 // } 4891 while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { 4892 int Idx = ConcatEnd - 1; 4893 VT = ConcatOps[Idx--].getValueType(); 4894 while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) 4895 Idx--; 4896 4897 int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; 4898 EVT NextVT; 4899 do { 4900 NextSize *= 2; 4901 NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); 4902 } while (!TLI.isTypeLegal(NextVT)); 4903 4904 if (!VT.isVector()) { 4905 // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT 4906 SDValue VecOp = DAG.getUNDEF(NextVT); 4907 unsigned NumToInsert = ConcatEnd - Idx - 1; 4908 for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { 4909 VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, 4910 ConcatOps[OpIdx], DAG.getVectorIdxConstant(i, dl)); 4911 } 4912 ConcatOps[Idx+1] = VecOp; 4913 ConcatEnd = Idx + 2; 4914 } else { 4915 // Vector type, create a CONCAT_VECTORS of type NextVT 4916 SDValue undefVec = DAG.getUNDEF(VT); 4917 unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); 4918 SmallVector<SDValue, 16> SubConcatOps(OpsToConcat); 4919 unsigned RealVals = ConcatEnd - Idx - 1; 4920 unsigned SubConcatEnd = 0; 4921 unsigned SubConcatIdx = Idx + 1; 4922 while (SubConcatEnd < RealVals) 4923 SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; 4924 while (SubConcatEnd < OpsToConcat) 4925 SubConcatOps[SubConcatEnd++] = undefVec; 4926 ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, 4927 NextVT, SubConcatOps); 4928 ConcatEnd = SubConcatIdx + 1; 4929 } 4930 } 4931 4932 // Check to see if we have a single operation with the widen type. 4933 if (ConcatEnd == 1) { 4934 VT = ConcatOps[0].getValueType(); 4935 if (VT == WidenVT) 4936 return ConcatOps[0]; 4937 } 4938 4939 // add undefs of size MaxVT until ConcatOps grows to length of WidenVT 4940 unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); 4941 if (NumOps != ConcatEnd ) { 4942 SDValue UndefVal = DAG.getUNDEF(MaxVT); 4943 for (unsigned j = ConcatEnd; j < NumOps; ++j) 4944 ConcatOps[j] = UndefVal; 4945 } 4946 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, 4947 ArrayRef(ConcatOps.data(), NumOps)); 4948 } 4949 4950 SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { 4951 // Binary op widening for operations that can trap. 4952 unsigned Opcode = N->getOpcode(); 4953 SDLoc dl(N); 4954 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 4955 EVT WidenEltVT = WidenVT.getVectorElementType(); 4956 EVT VT = WidenVT; 4957 unsigned NumElts = VT.getVectorMinNumElements(); 4958 const SDNodeFlags Flags = N->getFlags(); 4959 while (!TLI.isTypeLegal(VT) && NumElts != 1) { 4960 NumElts = NumElts / 2; 4961 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); 4962 } 4963 4964 if (NumElts != 1 && !TLI.canOpTrap(N->getOpcode(), VT)) { 4965 // Operation doesn't trap so just widen as normal. 4966 SDValue InOp1 = GetWidenedVector(N->getOperand(0)); 4967 SDValue InOp2 = GetWidenedVector(N->getOperand(1)); 4968 return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, Flags); 4969 } 4970 4971 // Generate a vp.op if it is custom/legal for the target. This avoids need 4972 // to split and tile the subvectors (below), because the inactive lanes can 4973 // simply be disabled. To avoid possible recursion, only do this if the 4974 // widened mask type is legal. 4975 if (auto VPOpcode = ISD::getVPForBaseOpcode(Opcode); 4976 VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WidenVT)) { 4977 if (EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 4978 WidenVT.getVectorElementCount()); 4979 TLI.isTypeLegal(WideMaskVT)) { 4980 SDValue InOp1 = GetWidenedVector(N->getOperand(0)); 4981 SDValue InOp2 = GetWidenedVector(N->getOperand(1)); 4982 SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT); 4983 SDValue EVL = 4984 DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(), 4985 N->getValueType(0).getVectorElementCount()); 4986 return DAG.getNode(*VPOpcode, dl, WidenVT, InOp1, InOp2, Mask, EVL, 4987 Flags); 4988 } 4989 } 4990 4991 // FIXME: Improve support for scalable vectors. 4992 assert(!VT.isScalableVector() && "Scalable vectors not handled yet."); 4993 4994 // No legal vector version so unroll the vector operation and then widen. 4995 if (NumElts == 1) 4996 return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); 4997 4998 // Since the operation can trap, apply operation on the original vector. 4999 EVT MaxVT = VT; 5000 SDValue InOp1 = GetWidenedVector(N->getOperand(0)); 5001 SDValue InOp2 = GetWidenedVector(N->getOperand(1)); 5002 unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); 5003 5004 SmallVector<SDValue, 16> ConcatOps(CurNumElts); 5005 unsigned ConcatEnd = 0; // Current ConcatOps index. 5006 int Idx = 0; // Current Idx into input vectors. 5007 5008 // NumElts := greatest legal vector size (at most WidenVT) 5009 // while (orig. vector has unhandled elements) { 5010 // take munches of size NumElts from the beginning and add to ConcatOps 5011 // NumElts := next smaller supported vector size or 1 5012 // } 5013 while (CurNumElts != 0) { 5014 while (CurNumElts >= NumElts) { 5015 SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, 5016 DAG.getVectorIdxConstant(Idx, dl)); 5017 SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, 5018 DAG.getVectorIdxConstant(Idx, dl)); 5019 ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2, Flags); 5020 Idx += NumElts; 5021 CurNumElts -= NumElts; 5022 } 5023 do { 5024 NumElts = NumElts / 2; 5025 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); 5026 } while (!TLI.isTypeLegal(VT) && NumElts != 1); 5027 5028 if (NumElts == 1) { 5029 for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { 5030 SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 5031 InOp1, DAG.getVectorIdxConstant(Idx, dl)); 5032 SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, 5033 InOp2, DAG.getVectorIdxConstant(Idx, dl)); 5034 ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, 5035 EOp1, EOp2, Flags); 5036 } 5037 CurNumElts = 0; 5038 } 5039 } 5040 5041 return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT); 5042 } 5043 5044 SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) { 5045 switch (N->getOpcode()) { 5046 case ISD::STRICT_FSETCC: 5047 case ISD::STRICT_FSETCCS: 5048 return WidenVecRes_STRICT_FSETCC(N); 5049 case ISD::STRICT_FP_EXTEND: 5050 case ISD::STRICT_FP_ROUND: 5051 case ISD::STRICT_FP_TO_SINT: 5052 case ISD::STRICT_FP_TO_UINT: 5053 case ISD::STRICT_SINT_TO_FP: 5054 case ISD::STRICT_UINT_TO_FP: 5055 return WidenVecRes_Convert_StrictFP(N); 5056 default: 5057 break; 5058 } 5059 5060 // StrictFP op widening for operations that can trap. 5061 unsigned NumOpers = N->getNumOperands(); 5062 unsigned Opcode = N->getOpcode(); 5063 SDLoc dl(N); 5064 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5065 EVT WidenEltVT = WidenVT.getVectorElementType(); 5066 EVT VT = WidenVT; 5067 unsigned NumElts = VT.getVectorNumElements(); 5068 while (!TLI.isTypeLegal(VT) && NumElts != 1) { 5069 NumElts = NumElts / 2; 5070 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); 5071 } 5072 5073 // No legal vector version so unroll the vector operation and then widen. 5074 if (NumElts == 1) 5075 return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements()); 5076 5077 // Since the operation can trap, apply operation on the original vector. 5078 EVT MaxVT = VT; 5079 SmallVector<SDValue, 4> InOps; 5080 unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); 5081 5082 SmallVector<SDValue, 16> ConcatOps(CurNumElts); 5083 SmallVector<SDValue, 16> Chains; 5084 unsigned ConcatEnd = 0; // Current ConcatOps index. 5085 int Idx = 0; // Current Idx into input vectors. 5086 5087 // The Chain is the first operand. 5088 InOps.push_back(N->getOperand(0)); 5089 5090 // Now process the remaining operands. 5091 for (unsigned i = 1; i < NumOpers; ++i) { 5092 SDValue Oper = N->getOperand(i); 5093 5094 EVT OpVT = Oper.getValueType(); 5095 if (OpVT.isVector()) { 5096 if (getTypeAction(OpVT) == TargetLowering::TypeWidenVector) 5097 Oper = GetWidenedVector(Oper); 5098 else { 5099 EVT WideOpVT = 5100 EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(), 5101 WidenVT.getVectorElementCount()); 5102 Oper = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, 5103 DAG.getUNDEF(WideOpVT), Oper, 5104 DAG.getVectorIdxConstant(0, dl)); 5105 } 5106 } 5107 5108 InOps.push_back(Oper); 5109 } 5110 5111 // NumElts := greatest legal vector size (at most WidenVT) 5112 // while (orig. vector has unhandled elements) { 5113 // take munches of size NumElts from the beginning and add to ConcatOps 5114 // NumElts := next smaller supported vector size or 1 5115 // } 5116 while (CurNumElts != 0) { 5117 while (CurNumElts >= NumElts) { 5118 SmallVector<SDValue, 4> EOps; 5119 5120 for (unsigned i = 0; i < NumOpers; ++i) { 5121 SDValue Op = InOps[i]; 5122 5123 EVT OpVT = Op.getValueType(); 5124 if (OpVT.isVector()) { 5125 EVT OpExtractVT = 5126 EVT::getVectorVT(*DAG.getContext(), OpVT.getVectorElementType(), 5127 VT.getVectorElementCount()); 5128 Op = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpExtractVT, Op, 5129 DAG.getVectorIdxConstant(Idx, dl)); 5130 } 5131 5132 EOps.push_back(Op); 5133 } 5134 5135 EVT OperVT[] = {VT, MVT::Other}; 5136 SDValue Oper = DAG.getNode(Opcode, dl, OperVT, EOps); 5137 ConcatOps[ConcatEnd++] = Oper; 5138 Chains.push_back(Oper.getValue(1)); 5139 Idx += NumElts; 5140 CurNumElts -= NumElts; 5141 } 5142 do { 5143 NumElts = NumElts / 2; 5144 VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); 5145 } while (!TLI.isTypeLegal(VT) && NumElts != 1); 5146 5147 if (NumElts == 1) { 5148 for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { 5149 SmallVector<SDValue, 4> EOps; 5150 5151 for (unsigned i = 0; i < NumOpers; ++i) { 5152 SDValue Op = InOps[i]; 5153 5154 EVT OpVT = Op.getValueType(); 5155 if (OpVT.isVector()) 5156 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, 5157 OpVT.getVectorElementType(), Op, 5158 DAG.getVectorIdxConstant(Idx, dl)); 5159 5160 EOps.push_back(Op); 5161 } 5162 5163 EVT WidenVT[] = {WidenEltVT, MVT::Other}; 5164 SDValue Oper = DAG.getNode(Opcode, dl, WidenVT, EOps); 5165 ConcatOps[ConcatEnd++] = Oper; 5166 Chains.push_back(Oper.getValue(1)); 5167 } 5168 CurNumElts = 0; 5169 } 5170 } 5171 5172 // Build a factor node to remember all the Ops that have been created. 5173 SDValue NewChain; 5174 if (Chains.size() == 1) 5175 NewChain = Chains[0]; 5176 else 5177 NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); 5178 ReplaceValueWith(SDValue(N, 1), NewChain); 5179 5180 return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT); 5181 } 5182 5183 SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) { 5184 SDLoc DL(N); 5185 EVT ResVT = N->getValueType(0); 5186 EVT OvVT = N->getValueType(1); 5187 EVT WideResVT, WideOvVT; 5188 SDValue WideLHS, WideRHS; 5189 5190 // TODO: This might result in a widen/split loop. 5191 if (ResNo == 0) { 5192 WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT); 5193 WideOvVT = EVT::getVectorVT( 5194 *DAG.getContext(), OvVT.getVectorElementType(), 5195 WideResVT.getVectorNumElements()); 5196 5197 WideLHS = GetWidenedVector(N->getOperand(0)); 5198 WideRHS = GetWidenedVector(N->getOperand(1)); 5199 } else { 5200 WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT); 5201 WideResVT = EVT::getVectorVT( 5202 *DAG.getContext(), ResVT.getVectorElementType(), 5203 WideOvVT.getVectorNumElements()); 5204 5205 SDValue Zero = DAG.getVectorIdxConstant(0, DL); 5206 WideLHS = DAG.getNode( 5207 ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT), 5208 N->getOperand(0), Zero); 5209 WideRHS = DAG.getNode( 5210 ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT), 5211 N->getOperand(1), Zero); 5212 } 5213 5214 SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT); 5215 SDNode *WideNode = DAG.getNode( 5216 N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode(); 5217 5218 // Replace the other vector result not being explicitly widened here. 5219 unsigned OtherNo = 1 - ResNo; 5220 EVT OtherVT = N->getValueType(OtherNo); 5221 if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) { 5222 SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo)); 5223 } else { 5224 SDValue Zero = DAG.getVectorIdxConstant(0, DL); 5225 SDValue OtherVal = DAG.getNode( 5226 ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero); 5227 ReplaceValueWith(SDValue(N, OtherNo), OtherVal); 5228 } 5229 5230 return SDValue(WideNode, ResNo); 5231 } 5232 5233 SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { 5234 LLVMContext &Ctx = *DAG.getContext(); 5235 SDValue InOp = N->getOperand(0); 5236 SDLoc DL(N); 5237 5238 EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0)); 5239 ElementCount WidenEC = WidenVT.getVectorElementCount(); 5240 5241 EVT InVT = InOp.getValueType(); 5242 5243 unsigned Opcode = N->getOpcode(); 5244 const SDNodeFlags Flags = N->getFlags(); 5245 5246 // Handle the case of ZERO_EXTEND where the promoted InVT element size does 5247 // not equal that of WidenVT. 5248 if (N->getOpcode() == ISD::ZERO_EXTEND && 5249 getTypeAction(InVT) == TargetLowering::TypePromoteInteger && 5250 TLI.getTypeToTransformTo(Ctx, InVT).getScalarSizeInBits() != 5251 WidenVT.getScalarSizeInBits()) { 5252 InOp = ZExtPromotedInteger(InOp); 5253 InVT = InOp.getValueType(); 5254 if (WidenVT.getScalarSizeInBits() < InVT.getScalarSizeInBits()) 5255 Opcode = ISD::TRUNCATE; 5256 } 5257 5258 EVT InEltVT = InVT.getVectorElementType(); 5259 EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC); 5260 ElementCount InVTEC = InVT.getVectorElementCount(); 5261 5262 if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { 5263 InOp = GetWidenedVector(N->getOperand(0)); 5264 InVT = InOp.getValueType(); 5265 InVTEC = InVT.getVectorElementCount(); 5266 if (InVTEC == WidenEC) { 5267 if (N->getNumOperands() == 1) 5268 return DAG.getNode(Opcode, DL, WidenVT, InOp, Flags); 5269 if (N->getNumOperands() == 3) { 5270 assert(N->isVPOpcode() && "Expected VP opcode"); 5271 SDValue Mask = 5272 GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); 5273 return DAG.getNode(Opcode, DL, WidenVT, InOp, Mask, N->getOperand(2)); 5274 } 5275 return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags); 5276 } 5277 if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) { 5278 // If both input and result vector types are of same width, extend 5279 // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which 5280 // accepts fewer elements in the result than in the input. 5281 if (Opcode == ISD::ANY_EXTEND) 5282 return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, WidenVT, InOp); 5283 if (Opcode == ISD::SIGN_EXTEND) 5284 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, WidenVT, InOp); 5285 if (Opcode == ISD::ZERO_EXTEND) 5286 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, WidenVT, InOp); 5287 } 5288 } 5289 5290 if (TLI.isTypeLegal(InWidenVT)) { 5291 // Because the result and the input are different vector types, widening 5292 // the result could create a legal type but widening the input might make 5293 // it an illegal type that might lead to repeatedly splitting the input 5294 // and then widening it. To avoid this, we widen the input only if 5295 // it results in a legal type. 5296 if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) { 5297 // Widen the input and call convert on the widened input vector. 5298 unsigned NumConcat = 5299 WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue(); 5300 SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); 5301 Ops[0] = InOp; 5302 SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops); 5303 if (N->getNumOperands() == 1) 5304 return DAG.getNode(Opcode, DL, WidenVT, InVec, Flags); 5305 return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags); 5306 } 5307 5308 if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) { 5309 SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp, 5310 DAG.getVectorIdxConstant(0, DL)); 5311 // Extract the input and convert the shorten input vector. 5312 if (N->getNumOperands() == 1) 5313 return DAG.getNode(Opcode, DL, WidenVT, InVal, Flags); 5314 return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1), Flags); 5315 } 5316 } 5317 5318 // Otherwise unroll into some nasty scalar code and rebuild the vector. 5319 EVT EltVT = WidenVT.getVectorElementType(); 5320 SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT)); 5321 // Use the original element count so we don't do more scalar opts than 5322 // necessary. 5323 unsigned MinElts = N->getValueType(0).getVectorNumElements(); 5324 for (unsigned i=0; i < MinElts; ++i) { 5325 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, 5326 DAG.getVectorIdxConstant(i, DL)); 5327 if (N->getNumOperands() == 1) 5328 Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, Flags); 5329 else 5330 Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1), Flags); 5331 } 5332 5333 return DAG.getBuildVector(WidenVT, DL, Ops); 5334 } 5335 5336 SDValue DAGTypeLegalizer::WidenVecRes_FP_TO_XINT_SAT(SDNode *N) { 5337 SDLoc dl(N); 5338 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5339 ElementCount WidenNumElts = WidenVT.getVectorElementCount(); 5340 5341 SDValue Src = N->getOperand(0); 5342 EVT SrcVT = Src.getValueType(); 5343 5344 // Also widen the input. 5345 if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) { 5346 Src = GetWidenedVector(Src); 5347 SrcVT = Src.getValueType(); 5348 } 5349 5350 // Input and output not widened to the same size, give up. 5351 if (WidenNumElts != SrcVT.getVectorElementCount()) 5352 return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue()); 5353 5354 return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, N->getOperand(1)); 5355 } 5356 5357 SDValue DAGTypeLegalizer::WidenVecRes_XROUND(SDNode *N) { 5358 SDLoc dl(N); 5359 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5360 ElementCount WidenNumElts = WidenVT.getVectorElementCount(); 5361 5362 SDValue Src = N->getOperand(0); 5363 EVT SrcVT = Src.getValueType(); 5364 5365 // Also widen the input. 5366 if (getTypeAction(SrcVT) == TargetLowering::TypeWidenVector) { 5367 Src = GetWidenedVector(Src); 5368 SrcVT = Src.getValueType(); 5369 } 5370 5371 // Input and output not widened to the same size, give up. 5372 if (WidenNumElts != SrcVT.getVectorElementCount()) 5373 return DAG.UnrollVectorOp(N, WidenNumElts.getKnownMinValue()); 5374 5375 if (N->getNumOperands() == 1) 5376 return DAG.getNode(N->getOpcode(), dl, WidenVT, Src); 5377 5378 assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); 5379 assert(N->isVPOpcode() && "Expected VP opcode"); 5380 5381 SDValue Mask = 5382 GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); 5383 return DAG.getNode(N->getOpcode(), dl, WidenVT, Src, Mask, N->getOperand(2)); 5384 } 5385 5386 SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) { 5387 SDValue InOp = N->getOperand(1); 5388 SDLoc DL(N); 5389 SmallVector<SDValue, 4> NewOps(N->ops()); 5390 5391 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5392 unsigned WidenNumElts = WidenVT.getVectorNumElements(); 5393 5394 EVT InVT = InOp.getValueType(); 5395 EVT InEltVT = InVT.getVectorElementType(); 5396 5397 unsigned Opcode = N->getOpcode(); 5398 5399 // FIXME: Optimizations need to be implemented here. 5400 5401 // Otherwise unroll into some nasty scalar code and rebuild the vector. 5402 EVT EltVT = WidenVT.getVectorElementType(); 5403 std::array<EVT, 2> EltVTs = {{EltVT, MVT::Other}}; 5404 SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); 5405 SmallVector<SDValue, 32> OpChains; 5406 // Use the original element count so we don't do more scalar opts than 5407 // necessary. 5408 unsigned MinElts = N->getValueType(0).getVectorNumElements(); 5409 for (unsigned i=0; i < MinElts; ++i) { 5410 NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, 5411 DAG.getVectorIdxConstant(i, DL)); 5412 Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps); 5413 OpChains.push_back(Ops[i].getValue(1)); 5414 } 5415 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains); 5416 ReplaceValueWith(SDValue(N, 1), NewChain); 5417 5418 return DAG.getBuildVector(WidenVT, DL, Ops); 5419 } 5420 5421 SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) { 5422 unsigned Opcode = N->getOpcode(); 5423 SDValue InOp = N->getOperand(0); 5424 SDLoc DL(N); 5425 5426 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5427 EVT WidenSVT = WidenVT.getVectorElementType(); 5428 unsigned WidenNumElts = WidenVT.getVectorNumElements(); 5429 5430 EVT InVT = InOp.getValueType(); 5431 EVT InSVT = InVT.getVectorElementType(); 5432 unsigned InVTNumElts = InVT.getVectorNumElements(); 5433 5434 if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { 5435 InOp = GetWidenedVector(InOp); 5436 InVT = InOp.getValueType(); 5437 if (InVT.getSizeInBits() == WidenVT.getSizeInBits()) { 5438 switch (Opcode) { 5439 case ISD::ANY_EXTEND_VECTOR_INREG: 5440 case ISD::SIGN_EXTEND_VECTOR_INREG: 5441 case ISD::ZERO_EXTEND_VECTOR_INREG: 5442 return DAG.getNode(Opcode, DL, WidenVT, InOp); 5443 } 5444 } 5445 } 5446 5447 // Unroll, extend the scalars and rebuild the vector. 5448 SmallVector<SDValue, 16> Ops; 5449 for (unsigned i = 0, e = std::min(InVTNumElts, WidenNumElts); i != e; ++i) { 5450 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InSVT, InOp, 5451 DAG.getVectorIdxConstant(i, DL)); 5452 switch (Opcode) { 5453 case ISD::ANY_EXTEND_VECTOR_INREG: 5454 Val = DAG.getNode(ISD::ANY_EXTEND, DL, WidenSVT, Val); 5455 break; 5456 case ISD::SIGN_EXTEND_VECTOR_INREG: 5457 Val = DAG.getNode(ISD::SIGN_EXTEND, DL, WidenSVT, Val); 5458 break; 5459 case ISD::ZERO_EXTEND_VECTOR_INREG: 5460 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, WidenSVT, Val); 5461 break; 5462 default: 5463 llvm_unreachable("A *_EXTEND_VECTOR_INREG node was expected"); 5464 } 5465 Ops.push_back(Val); 5466 } 5467 5468 while (Ops.size() != WidenNumElts) 5469 Ops.push_back(DAG.getUNDEF(WidenSVT)); 5470 5471 return DAG.getBuildVector(WidenVT, DL, Ops); 5472 } 5473 5474 SDValue DAGTypeLegalizer::WidenVecRes_FCOPYSIGN(SDNode *N) { 5475 // If this is an FCOPYSIGN with same input types, we can treat it as a 5476 // normal (can trap) binary op. 5477 if (N->getOperand(0).getValueType() == N->getOperand(1).getValueType()) 5478 return WidenVecRes_BinaryCanTrap(N); 5479 5480 // If the types are different, fall back to unrolling. 5481 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5482 return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); 5483 } 5484 5485 /// Result and first source operand are different scalar types, but must have 5486 /// the same number of elements. There is an additional control argument which 5487 /// should be passed through unchanged. 5488 SDValue DAGTypeLegalizer::WidenVecRes_UnarySameEltsWithScalarArg(SDNode *N) { 5489 SDValue FpValue = N->getOperand(0); 5490 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5491 if (getTypeAction(FpValue.getValueType()) != TargetLowering::TypeWidenVector) 5492 return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); 5493 SDValue Arg = GetWidenedVector(FpValue); 5494 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, {Arg, N->getOperand(1)}, 5495 N->getFlags()); 5496 } 5497 5498 SDValue DAGTypeLegalizer::WidenVecRes_ExpOp(SDNode *N) { 5499 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5500 SDValue InOp = GetWidenedVector(N->getOperand(0)); 5501 SDValue RHS = N->getOperand(1); 5502 EVT ExpVT = RHS.getValueType(); 5503 SDValue ExpOp = RHS; 5504 if (ExpVT.isVector()) { 5505 EVT WideExpVT = 5506 WidenVT.changeVectorElementType(ExpVT.getVectorElementType()); 5507 ExpOp = ModifyToType(RHS, WideExpVT); 5508 } 5509 5510 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ExpOp); 5511 } 5512 5513 SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { 5514 // Unary op widening. 5515 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5516 SDValue InOp = GetWidenedVector(N->getOperand(0)); 5517 if (N->getNumOperands() == 1) 5518 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, N->getFlags()); 5519 5520 assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); 5521 assert(N->isVPOpcode() && "Expected VP opcode"); 5522 5523 SDValue Mask = 5524 GetWidenedMask(N->getOperand(1), WidenVT.getVectorElementCount()); 5525 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, 5526 {InOp, Mask, N->getOperand(2)}); 5527 } 5528 5529 SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { 5530 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5531 EVT ExtVT = EVT::getVectorVT(*DAG.getContext(), 5532 cast<VTSDNode>(N->getOperand(1))->getVT() 5533 .getVectorElementType(), 5534 WidenVT.getVectorNumElements()); 5535 SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); 5536 return DAG.getNode(N->getOpcode(), SDLoc(N), 5537 WidenVT, WidenLHS, DAG.getValueType(ExtVT)); 5538 } 5539 5540 SDValue DAGTypeLegalizer::WidenVecRes_UnaryOpWithTwoResults(SDNode *N, 5541 unsigned ResNo) { 5542 EVT VT0 = N->getValueType(0); 5543 EVT VT1 = N->getValueType(1); 5544 5545 assert(VT0.isVector() && VT1.isVector() && 5546 VT0.getVectorElementCount() == VT1.getVectorElementCount() && 5547 "expected both results to be vectors of matching element count"); 5548 5549 LLVMContext &Ctx = *DAG.getContext(); 5550 SDValue InOp = GetWidenedVector(N->getOperand(0)); 5551 5552 EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(ResNo)); 5553 ElementCount WidenEC = WidenVT.getVectorElementCount(); 5554 5555 EVT WidenVT0 = EVT::getVectorVT(Ctx, VT0.getVectorElementType(), WidenEC); 5556 EVT WidenVT1 = EVT::getVectorVT(Ctx, VT1.getVectorElementType(), WidenEC); 5557 5558 SDNode *WidenNode = 5559 DAG.getNode(N->getOpcode(), SDLoc(N), {WidenVT0, WidenVT1}, InOp) 5560 .getNode(); 5561 5562 ReplaceOtherWidenResults(N, WidenNode, ResNo); 5563 return SDValue(WidenNode, ResNo); 5564 } 5565 5566 SDValue DAGTypeLegalizer::WidenVecRes_MERGE_VALUES(SDNode *N, unsigned ResNo) { 5567 SDValue WidenVec = DisintegrateMERGE_VALUES(N, ResNo); 5568 return GetWidenedVector(WidenVec); 5569 } 5570 5571 SDValue DAGTypeLegalizer::WidenVecRes_ADDRSPACECAST(SDNode *N) { 5572 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5573 SDValue InOp = GetWidenedVector(N->getOperand(0)); 5574 auto *AddrSpaceCastN = cast<AddrSpaceCastSDNode>(N); 5575 5576 return DAG.getAddrSpaceCast(SDLoc(N), WidenVT, InOp, 5577 AddrSpaceCastN->getSrcAddressSpace(), 5578 AddrSpaceCastN->getDestAddressSpace()); 5579 } 5580 5581 SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { 5582 SDValue InOp = N->getOperand(0); 5583 EVT InVT = InOp.getValueType(); 5584 EVT VT = N->getValueType(0); 5585 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 5586 SDLoc dl(N); 5587 5588 switch (getTypeAction(InVT)) { 5589 case TargetLowering::TypeLegal: 5590 break; 5591 case TargetLowering::TypeScalarizeScalableVector: 5592 report_fatal_error("Scalarization of scalable vectors is not supported."); 5593 case TargetLowering::TypePromoteInteger: { 5594 // If the incoming type is a vector that is being promoted, then 5595 // we know that the elements are arranged differently and that we 5596 // must perform the conversion using a stack slot. 5597 if (InVT.isVector()) 5598 break; 5599 5600 // If the InOp is promoted to the same size, convert it. Otherwise, 5601 // fall out of the switch and widen the promoted input. 5602 SDValue NInOp = GetPromotedInteger(InOp); 5603 EVT NInVT = NInOp.getValueType(); 5604 if (WidenVT.bitsEq(NInVT)) { 5605 // For big endian targets we need to shift the input integer or the 5606 // interesting bits will end up at the wrong place. 5607 if (DAG.getDataLayout().isBigEndian()) { 5608 unsigned ShiftAmt = NInVT.getSizeInBits() - InVT.getSizeInBits(); 5609 EVT ShiftAmtTy = TLI.getShiftAmountTy(NInVT, DAG.getDataLayout()); 5610 assert(ShiftAmt < WidenVT.getSizeInBits() && "Too large shift amount!"); 5611 NInOp = DAG.getNode(ISD::SHL, dl, NInVT, NInOp, 5612 DAG.getConstant(ShiftAmt, dl, ShiftAmtTy)); 5613 } 5614 return DAG.getNode(ISD::BITCAST, dl, WidenVT, NInOp); 5615 } 5616 InOp = NInOp; 5617 InVT = NInVT; 5618 break; 5619 } 5620 case TargetLowering::TypeSoftenFloat: 5621 case TargetLowering::TypePromoteFloat: 5622 case TargetLowering::TypeSoftPromoteHalf: 5623 case TargetLowering::TypeExpandInteger: 5624 case TargetLowering::TypeExpandFloat: 5625 case TargetLowering::TypeScalarizeVector: 5626 case TargetLowering::TypeSplitVector: 5627 break; 5628 case TargetLowering::TypeWidenVector: 5629 // If the InOp is widened to the same size, convert it. Otherwise, fall 5630 // out of the switch and widen the widened input. 5631 InOp = GetWidenedVector(InOp); 5632 InVT = InOp.getValueType(); 5633 if (WidenVT.bitsEq(InVT)) 5634 // The input widens to the same size. Convert to the widen value. 5635 return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp); 5636 break; 5637 } 5638 5639 unsigned WidenSize = WidenVT.getSizeInBits(); 5640 unsigned InSize = InVT.getSizeInBits(); 5641 unsigned InScalarSize = InVT.getScalarSizeInBits(); 5642 // x86mmx is not an acceptable vector element type, so don't try. 5643 if (WidenSize % InScalarSize == 0 && InVT != MVT::x86mmx) { 5644 // Determine new input vector type. The new input vector type will use 5645 // the same element type (if its a vector) or use the input type as a 5646 // vector. It is the same size as the type to widen to. 5647 EVT NewInVT; 5648 unsigned NewNumParts = WidenSize / InSize; 5649 if (InVT.isVector()) { 5650 EVT InEltVT = InVT.getVectorElementType(); 5651 NewInVT = EVT::getVectorVT(*DAG.getContext(), InEltVT, 5652 WidenSize / InEltVT.getSizeInBits()); 5653 } else { 5654 // For big endian systems, using the promoted input scalar type 5655 // to produce the scalar_to_vector would put the desired bits into 5656 // the least significant byte(s) of the wider element zero. This 5657 // will mean that the users of the result vector are using incorrect 5658 // bits. Use the original input type instead. Although either input 5659 // type can be used on little endian systems, for consistency we 5660 // use the original type there as well. 5661 EVT OrigInVT = N->getOperand(0).getValueType(); 5662 NewNumParts = WidenSize / OrigInVT.getSizeInBits(); 5663 NewInVT = EVT::getVectorVT(*DAG.getContext(), OrigInVT, NewNumParts); 5664 } 5665 5666 if (TLI.isTypeLegal(NewInVT)) { 5667 SDValue NewVec; 5668 if (InVT.isVector()) { 5669 // Because the result and the input are different vector types, widening 5670 // the result could create a legal type but widening the input might 5671 // make it an illegal type that might lead to repeatedly splitting the 5672 // input and then widening it. To avoid this, we widen the input only if 5673 // it results in a legal type. 5674 if (WidenSize % InSize == 0) { 5675 SmallVector<SDValue, 16> Ops(NewNumParts, DAG.getUNDEF(InVT)); 5676 Ops[0] = InOp; 5677 5678 NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewInVT, Ops); 5679 } else { 5680 SmallVector<SDValue, 16> Ops; 5681 DAG.ExtractVectorElements(InOp, Ops); 5682 Ops.append(WidenSize / InScalarSize - Ops.size(), 5683 DAG.getUNDEF(InVT.getVectorElementType())); 5684 5685 NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl, NewInVT, Ops); 5686 } 5687 } else { 5688 NewVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewInVT, InOp); 5689 } 5690 return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec); 5691 } 5692 } 5693 5694 return CreateStackStoreLoad(InOp, WidenVT); 5695 } 5696 5697 SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { 5698 SDLoc dl(N); 5699 // Build a vector with undefined for the new nodes. 5700 EVT VT = N->getValueType(0); 5701 5702 // Integer BUILD_VECTOR operands may be larger than the node's vector element 5703 // type. The UNDEFs need to have the same type as the existing operands. 5704 EVT EltVT = N->getOperand(0).getValueType(); 5705 unsigned NumElts = VT.getVectorNumElements(); 5706 5707 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 5708 unsigned WidenNumElts = WidenVT.getVectorNumElements(); 5709 5710 SmallVector<SDValue, 16> NewOps(N->ops()); 5711 assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); 5712 NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); 5713 5714 return DAG.getBuildVector(WidenVT, dl, NewOps); 5715 } 5716 5717 SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { 5718 EVT InVT = N->getOperand(0).getValueType(); 5719 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5720 SDLoc dl(N); 5721 unsigned NumOperands = N->getNumOperands(); 5722 5723 bool InputWidened = false; // Indicates we need to widen the input. 5724 if (getTypeAction(InVT) != TargetLowering::TypeWidenVector) { 5725 unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); 5726 unsigned NumInElts = InVT.getVectorMinNumElements(); 5727 if (WidenNumElts % NumInElts == 0) { 5728 // Add undef vectors to widen to correct length. 5729 unsigned NumConcat = WidenNumElts / NumInElts; 5730 SDValue UndefVal = DAG.getUNDEF(InVT); 5731 SmallVector<SDValue, 16> Ops(NumConcat); 5732 for (unsigned i=0; i < NumOperands; ++i) 5733 Ops[i] = N->getOperand(i); 5734 for (unsigned i = NumOperands; i != NumConcat; ++i) 5735 Ops[i] = UndefVal; 5736 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Ops); 5737 } 5738 } else { 5739 InputWidened = true; 5740 if (WidenVT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { 5741 // The inputs and the result are widen to the same value. 5742 unsigned i; 5743 for (i=1; i < NumOperands; ++i) 5744 if (!N->getOperand(i).isUndef()) 5745 break; 5746 5747 if (i == NumOperands) 5748 // Everything but the first operand is an UNDEF so just return the 5749 // widened first operand. 5750 return GetWidenedVector(N->getOperand(0)); 5751 5752 if (NumOperands == 2) { 5753 assert(!WidenVT.isScalableVector() && 5754 "Cannot use vector shuffles to widen CONCAT_VECTOR result"); 5755 unsigned WidenNumElts = WidenVT.getVectorNumElements(); 5756 unsigned NumInElts = InVT.getVectorNumElements(); 5757 5758 // Replace concat of two operands with a shuffle. 5759 SmallVector<int, 16> MaskOps(WidenNumElts, -1); 5760 for (unsigned i = 0; i < NumInElts; ++i) { 5761 MaskOps[i] = i; 5762 MaskOps[i + NumInElts] = i + WidenNumElts; 5763 } 5764 return DAG.getVectorShuffle(WidenVT, dl, 5765 GetWidenedVector(N->getOperand(0)), 5766 GetWidenedVector(N->getOperand(1)), 5767 MaskOps); 5768 } 5769 } 5770 } 5771 5772 assert(!WidenVT.isScalableVector() && 5773 "Cannot use build vectors to widen CONCAT_VECTOR result"); 5774 unsigned WidenNumElts = WidenVT.getVectorNumElements(); 5775 unsigned NumInElts = InVT.getVectorNumElements(); 5776 5777 // Fall back to use extracts and build vector. 5778 EVT EltVT = WidenVT.getVectorElementType(); 5779 SmallVector<SDValue, 16> Ops(WidenNumElts); 5780 unsigned Idx = 0; 5781 for (unsigned i=0; i < NumOperands; ++i) { 5782 SDValue InOp = N->getOperand(i); 5783 if (InputWidened) 5784 InOp = GetWidenedVector(InOp); 5785 for (unsigned j = 0; j < NumInElts; ++j) 5786 Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, 5787 DAG.getVectorIdxConstant(j, dl)); 5788 } 5789 SDValue UndefVal = DAG.getUNDEF(EltVT); 5790 for (; Idx < WidenNumElts; ++Idx) 5791 Ops[Idx] = UndefVal; 5792 return DAG.getBuildVector(WidenVT, dl, Ops); 5793 } 5794 5795 SDValue DAGTypeLegalizer::WidenVecRes_INSERT_SUBVECTOR(SDNode *N) { 5796 EVT VT = N->getValueType(0); 5797 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 5798 SDValue InOp1 = GetWidenedVector(N->getOperand(0)); 5799 SDValue InOp2 = N->getOperand(1); 5800 SDValue Idx = N->getOperand(2); 5801 SDLoc dl(N); 5802 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WidenVT, InOp1, InOp2, Idx); 5803 } 5804 5805 SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { 5806 EVT VT = N->getValueType(0); 5807 EVT EltVT = VT.getVectorElementType(); 5808 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 5809 SDValue InOp = N->getOperand(0); 5810 SDValue Idx = N->getOperand(1); 5811 SDLoc dl(N); 5812 5813 auto InOpTypeAction = getTypeAction(InOp.getValueType()); 5814 if (InOpTypeAction == TargetLowering::TypeWidenVector) 5815 InOp = GetWidenedVector(InOp); 5816 5817 EVT InVT = InOp.getValueType(); 5818 5819 // Check if we can just return the input vector after widening. 5820 uint64_t IdxVal = Idx->getAsZExtVal(); 5821 if (IdxVal == 0 && InVT == WidenVT) 5822 return InOp; 5823 5824 // Check if we can extract from the vector. 5825 unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); 5826 unsigned InNumElts = InVT.getVectorMinNumElements(); 5827 unsigned VTNumElts = VT.getVectorMinNumElements(); 5828 assert(IdxVal % VTNumElts == 0 && 5829 "Expected Idx to be a multiple of subvector minimum vector length"); 5830 if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts) 5831 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx); 5832 5833 if (VT.isScalableVector()) { 5834 // Try to split the operation up into smaller extracts and concat the 5835 // results together, e.g. 5836 // nxv6i64 extract_subvector(nxv12i64, 6) 5837 // <-> 5838 // nxv8i64 concat( 5839 // nxv2i64 extract_subvector(nxv16i64, 6) 5840 // nxv2i64 extract_subvector(nxv16i64, 8) 5841 // nxv2i64 extract_subvector(nxv16i64, 10) 5842 // undef) 5843 unsigned GCD = std::gcd(VTNumElts, WidenNumElts); 5844 assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " 5845 "down type's element count"); 5846 EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, 5847 ElementCount::getScalable(GCD)); 5848 // Avoid recursion around e.g. nxv1i8. 5849 if (getTypeAction(PartVT) != TargetLowering::TypeWidenVector) { 5850 SmallVector<SDValue> Parts; 5851 unsigned I = 0; 5852 for (; I < VTNumElts / GCD; ++I) 5853 Parts.push_back( 5854 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, InOp, 5855 DAG.getVectorIdxConstant(IdxVal + I * GCD, dl))); 5856 for (; I < WidenNumElts / GCD; ++I) 5857 Parts.push_back(DAG.getUNDEF(PartVT)); 5858 5859 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); 5860 } 5861 5862 report_fatal_error("Don't know how to widen the result of " 5863 "EXTRACT_SUBVECTOR for scalable vectors"); 5864 } 5865 5866 // We could try widening the input to the right length but for now, extract 5867 // the original elements, fill the rest with undefs and build a vector. 5868 SmallVector<SDValue, 16> Ops(WidenNumElts); 5869 unsigned i; 5870 for (i = 0; i < VTNumElts; ++i) 5871 Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, 5872 DAG.getVectorIdxConstant(IdxVal + i, dl)); 5873 5874 SDValue UndefVal = DAG.getUNDEF(EltVT); 5875 for (; i < WidenNumElts; ++i) 5876 Ops[i] = UndefVal; 5877 return DAG.getBuildVector(WidenVT, dl, Ops); 5878 } 5879 5880 SDValue DAGTypeLegalizer::WidenVecRes_AssertZext(SDNode *N) { 5881 SDValue InOp = ModifyToType( 5882 N->getOperand(0), 5883 TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), true); 5884 return DAG.getNode(ISD::AssertZext, SDLoc(N), InOp.getValueType(), InOp, 5885 N->getOperand(1)); 5886 } 5887 5888 SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { 5889 SDValue InOp = GetWidenedVector(N->getOperand(0)); 5890 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), 5891 InOp.getValueType(), InOp, 5892 N->getOperand(1), N->getOperand(2)); 5893 } 5894 5895 SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { 5896 LoadSDNode *LD = cast<LoadSDNode>(N); 5897 ISD::LoadExtType ExtType = LD->getExtensionType(); 5898 5899 // A vector must always be stored in memory as-is, i.e. without any padding 5900 // between the elements, since various code depend on it, e.g. in the 5901 // handling of a bitcast of a vector type to int, which may be done with a 5902 // vector store followed by an integer load. A vector that does not have 5903 // elements that are byte-sized must therefore be stored as an integer 5904 // built out of the extracted vector elements. 5905 if (!LD->getMemoryVT().isByteSized()) { 5906 SDValue Value, NewChain; 5907 std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG); 5908 ReplaceValueWith(SDValue(LD, 0), Value); 5909 ReplaceValueWith(SDValue(LD, 1), NewChain); 5910 return SDValue(); 5911 } 5912 5913 // Generate a vector-predicated load if it is custom/legal on the target. To 5914 // avoid possible recursion, only do this if the widened mask type is legal. 5915 // FIXME: Not all targets may support EVL in VP_LOAD. These will have been 5916 // removed from the IR by the ExpandVectorPredication pass but we're 5917 // reintroducing them here. 5918 EVT LdVT = LD->getMemoryVT(); 5919 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT); 5920 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 5921 WideVT.getVectorElementCount()); 5922 if (ExtType == ISD::NON_EXTLOAD && 5923 TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) && 5924 TLI.isTypeLegal(WideMaskVT)) { 5925 SDLoc DL(N); 5926 SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); 5927 SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(), 5928 LdVT.getVectorElementCount()); 5929 SDValue NewLoad = 5930 DAG.getLoadVP(LD->getAddressingMode(), ISD::NON_EXTLOAD, WideVT, DL, 5931 LD->getChain(), LD->getBasePtr(), LD->getOffset(), Mask, 5932 EVL, LD->getMemoryVT(), LD->getMemOperand()); 5933 5934 // Modified the chain - switch anything that used the old chain to use 5935 // the new one. 5936 ReplaceValueWith(SDValue(N, 1), NewLoad.getValue(1)); 5937 5938 return NewLoad; 5939 } 5940 5941 SDValue Result; 5942 SmallVector<SDValue, 16> LdChain; // Chain for the series of load 5943 if (ExtType != ISD::NON_EXTLOAD) 5944 Result = GenWidenVectorExtLoads(LdChain, LD, ExtType); 5945 else 5946 Result = GenWidenVectorLoads(LdChain, LD); 5947 5948 if (Result) { 5949 // If we generate a single load, we can use that for the chain. Otherwise, 5950 // build a factor node to remember the multiple loads are independent and 5951 // chain to that. 5952 SDValue NewChain; 5953 if (LdChain.size() == 1) 5954 NewChain = LdChain[0]; 5955 else 5956 NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain); 5957 5958 // Modified the chain - switch anything that used the old chain to use 5959 // the new one. 5960 ReplaceValueWith(SDValue(N, 1), NewChain); 5961 5962 return Result; 5963 } 5964 5965 report_fatal_error("Unable to widen vector load"); 5966 } 5967 5968 SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) { 5969 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 5970 SDValue Mask = N->getMask(); 5971 SDValue EVL = N->getVectorLength(); 5972 ISD::LoadExtType ExtType = N->getExtensionType(); 5973 SDLoc dl(N); 5974 5975 // The mask should be widened as well 5976 assert(getTypeAction(Mask.getValueType()) == 5977 TargetLowering::TypeWidenVector && 5978 "Unable to widen binary VP op"); 5979 Mask = GetWidenedVector(Mask); 5980 assert(Mask.getValueType().getVectorElementCount() == 5981 TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType()) 5982 .getVectorElementCount() && 5983 "Unable to widen vector load"); 5984 5985 SDValue Res = 5986 DAG.getLoadVP(N->getAddressingMode(), ExtType, WidenVT, dl, N->getChain(), 5987 N->getBasePtr(), N->getOffset(), Mask, EVL, 5988 N->getMemoryVT(), N->getMemOperand(), N->isExpandingLoad()); 5989 // Legalize the chain result - switch anything that used the old chain to 5990 // use the new one. 5991 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 5992 return Res; 5993 } 5994 5995 SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) { 5996 SDLoc DL(N); 5997 5998 // The mask should be widened as well 5999 SDValue Mask = N->getMask(); 6000 assert(getTypeAction(Mask.getValueType()) == 6001 TargetLowering::TypeWidenVector && 6002 "Unable to widen VP strided load"); 6003 Mask = GetWidenedVector(Mask); 6004 6005 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 6006 assert(Mask.getValueType().getVectorElementCount() == 6007 WidenVT.getVectorElementCount() && 6008 "Data and mask vectors should have the same number of elements"); 6009 6010 SDValue Res = DAG.getStridedLoadVP( 6011 N->getAddressingMode(), N->getExtensionType(), WidenVT, DL, N->getChain(), 6012 N->getBasePtr(), N->getOffset(), N->getStride(), Mask, 6013 N->getVectorLength(), N->getMemoryVT(), N->getMemOperand(), 6014 N->isExpandingLoad()); 6015 6016 // Legalize the chain result - switch anything that used the old chain to 6017 // use the new one. 6018 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 6019 return Res; 6020 } 6021 6022 SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_COMPRESS(SDNode *N) { 6023 SDValue Vec = N->getOperand(0); 6024 SDValue Mask = N->getOperand(1); 6025 SDValue Passthru = N->getOperand(2); 6026 EVT WideVecVT = 6027 TLI.getTypeToTransformTo(*DAG.getContext(), Vec.getValueType()); 6028 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), 6029 Mask.getValueType().getVectorElementType(), 6030 WideVecVT.getVectorElementCount()); 6031 6032 SDValue WideVec = ModifyToType(Vec, WideVecVT); 6033 SDValue WideMask = ModifyToType(Mask, WideMaskVT, /*FillWithZeroes=*/true); 6034 SDValue WidePassthru = ModifyToType(Passthru, WideVecVT); 6035 return DAG.getNode(ISD::VECTOR_COMPRESS, SDLoc(N), WideVecVT, WideVec, 6036 WideMask, WidePassthru); 6037 } 6038 6039 SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) { 6040 6041 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),N->getValueType(0)); 6042 SDValue Mask = N->getMask(); 6043 EVT MaskVT = Mask.getValueType(); 6044 SDValue PassThru = GetWidenedVector(N->getPassThru()); 6045 ISD::LoadExtType ExtType = N->getExtensionType(); 6046 SDLoc dl(N); 6047 6048 // The mask should be widened as well 6049 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), 6050 MaskVT.getVectorElementType(), 6051 WidenVT.getVectorNumElements()); 6052 Mask = ModifyToType(Mask, WideMaskVT, true); 6053 6054 SDValue Res = DAG.getMaskedLoad( 6055 WidenVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, 6056 PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), 6057 ExtType, N->isExpandingLoad()); 6058 // Legalize the chain result - switch anything that used the old chain to 6059 // use the new one. 6060 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 6061 return Res; 6062 } 6063 6064 SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) { 6065 6066 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 6067 SDValue Mask = N->getMask(); 6068 EVT MaskVT = Mask.getValueType(); 6069 SDValue PassThru = GetWidenedVector(N->getPassThru()); 6070 SDValue Scale = N->getScale(); 6071 unsigned NumElts = WideVT.getVectorNumElements(); 6072 SDLoc dl(N); 6073 6074 // The mask should be widened as well 6075 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), 6076 MaskVT.getVectorElementType(), 6077 WideVT.getVectorNumElements()); 6078 Mask = ModifyToType(Mask, WideMaskVT, true); 6079 6080 // Widen the Index operand 6081 SDValue Index = N->getIndex(); 6082 EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), 6083 Index.getValueType().getScalarType(), 6084 NumElts); 6085 Index = ModifyToType(Index, WideIndexVT); 6086 SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, 6087 Scale }; 6088 6089 // Widen the MemoryType 6090 EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(), 6091 N->getMemoryVT().getScalarType(), NumElts); 6092 SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other), 6093 WideMemVT, dl, Ops, N->getMemOperand(), 6094 N->getIndexType(), N->getExtensionType()); 6095 6096 // Legalize the chain result - switch anything that used the old chain to 6097 // use the new one. 6098 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 6099 return Res; 6100 } 6101 6102 SDValue DAGTypeLegalizer::WidenVecRes_VP_GATHER(VPGatherSDNode *N) { 6103 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 6104 SDValue Mask = N->getMask(); 6105 SDValue Scale = N->getScale(); 6106 ElementCount WideEC = WideVT.getVectorElementCount(); 6107 SDLoc dl(N); 6108 6109 SDValue Index = GetWidenedVector(N->getIndex()); 6110 EVT WideMemVT = EVT::getVectorVT(*DAG.getContext(), 6111 N->getMemoryVT().getScalarType(), WideEC); 6112 Mask = GetWidenedMask(Mask, WideEC); 6113 6114 SDValue Ops[] = {N->getChain(), N->getBasePtr(), Index, Scale, 6115 Mask, N->getVectorLength()}; 6116 SDValue Res = DAG.getGatherVP(DAG.getVTList(WideVT, MVT::Other), WideMemVT, 6117 dl, Ops, N->getMemOperand(), N->getIndexType()); 6118 6119 // Legalize the chain result - switch anything that used the old chain to 6120 // use the new one. 6121 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 6122 return Res; 6123 } 6124 6125 SDValue DAGTypeLegalizer::WidenVecRes_ScalarOp(SDNode *N) { 6126 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 6127 if (N->isVPOpcode()) 6128 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0), 6129 N->getOperand(1), N->getOperand(2)); 6130 return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, N->getOperand(0)); 6131 } 6132 6133 // Return true is this is a SETCC node or a strict version of it. 6134 static inline bool isSETCCOp(unsigned Opcode) { 6135 switch (Opcode) { 6136 case ISD::SETCC: 6137 case ISD::STRICT_FSETCC: 6138 case ISD::STRICT_FSETCCS: 6139 return true; 6140 } 6141 return false; 6142 } 6143 6144 // Return true if this is a node that could have two SETCCs as operands. 6145 static inline bool isLogicalMaskOp(unsigned Opcode) { 6146 switch (Opcode) { 6147 case ISD::AND: 6148 case ISD::OR: 6149 case ISD::XOR: 6150 return true; 6151 } 6152 return false; 6153 } 6154 6155 // If N is a SETCC or a strict variant of it, return the type 6156 // of the compare operands. 6157 static inline EVT getSETCCOperandType(SDValue N) { 6158 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 6159 return N->getOperand(OpNo).getValueType(); 6160 } 6161 6162 // This is used just for the assert in convertMask(). Check that this either 6163 // a SETCC or a previously handled SETCC by convertMask(). 6164 #ifndef NDEBUG 6165 static inline bool isSETCCorConvertedSETCC(SDValue N) { 6166 if (N.getOpcode() == ISD::EXTRACT_SUBVECTOR) 6167 N = N.getOperand(0); 6168 else if (N.getOpcode() == ISD::CONCAT_VECTORS) { 6169 for (unsigned i = 1; i < N->getNumOperands(); ++i) 6170 if (!N->getOperand(i)->isUndef()) 6171 return false; 6172 N = N.getOperand(0); 6173 } 6174 6175 if (N.getOpcode() == ISD::TRUNCATE) 6176 N = N.getOperand(0); 6177 else if (N.getOpcode() == ISD::SIGN_EXTEND) 6178 N = N.getOperand(0); 6179 6180 if (isLogicalMaskOp(N.getOpcode())) 6181 return isSETCCorConvertedSETCC(N.getOperand(0)) && 6182 isSETCCorConvertedSETCC(N.getOperand(1)); 6183 6184 return (isSETCCOp(N.getOpcode()) || 6185 ISD::isBuildVectorOfConstantSDNodes(N.getNode())); 6186 } 6187 #endif 6188 6189 // Return a mask of vector type MaskVT to replace InMask. Also adjust MaskVT 6190 // to ToMaskVT if needed with vector extension or truncation. 6191 SDValue DAGTypeLegalizer::convertMask(SDValue InMask, EVT MaskVT, 6192 EVT ToMaskVT) { 6193 // Currently a SETCC or a AND/OR/XOR with two SETCCs are handled. 6194 // FIXME: This code seems to be too restrictive, we might consider 6195 // generalizing it or dropping it. 6196 assert(isSETCCorConvertedSETCC(InMask) && "Unexpected mask argument."); 6197 6198 // Make a new Mask node, with a legal result VT. 6199 SDValue Mask; 6200 SmallVector<SDValue, 4> Ops; 6201 for (unsigned i = 0, e = InMask->getNumOperands(); i < e; ++i) 6202 Ops.push_back(InMask->getOperand(i)); 6203 if (InMask->isStrictFPOpcode()) { 6204 Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), 6205 { MaskVT, MVT::Other }, Ops); 6206 ReplaceValueWith(InMask.getValue(1), Mask.getValue(1)); 6207 } 6208 else 6209 Mask = DAG.getNode(InMask->getOpcode(), SDLoc(InMask), MaskVT, Ops); 6210 6211 // If MaskVT has smaller or bigger elements than ToMaskVT, a vector sign 6212 // extend or truncate is needed. 6213 LLVMContext &Ctx = *DAG.getContext(); 6214 unsigned MaskScalarBits = MaskVT.getScalarSizeInBits(); 6215 unsigned ToMaskScalBits = ToMaskVT.getScalarSizeInBits(); 6216 if (MaskScalarBits < ToMaskScalBits) { 6217 EVT ExtVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(), 6218 MaskVT.getVectorNumElements()); 6219 Mask = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Mask), ExtVT, Mask); 6220 } else if (MaskScalarBits > ToMaskScalBits) { 6221 EVT TruncVT = EVT::getVectorVT(Ctx, ToMaskVT.getVectorElementType(), 6222 MaskVT.getVectorNumElements()); 6223 Mask = DAG.getNode(ISD::TRUNCATE, SDLoc(Mask), TruncVT, Mask); 6224 } 6225 6226 assert(Mask->getValueType(0).getScalarSizeInBits() == 6227 ToMaskVT.getScalarSizeInBits() && 6228 "Mask should have the right element size by now."); 6229 6230 // Adjust Mask to the right number of elements. 6231 unsigned CurrMaskNumEls = Mask->getValueType(0).getVectorNumElements(); 6232 if (CurrMaskNumEls > ToMaskVT.getVectorNumElements()) { 6233 SDValue ZeroIdx = DAG.getVectorIdxConstant(0, SDLoc(Mask)); 6234 Mask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Mask), ToMaskVT, Mask, 6235 ZeroIdx); 6236 } else if (CurrMaskNumEls < ToMaskVT.getVectorNumElements()) { 6237 unsigned NumSubVecs = (ToMaskVT.getVectorNumElements() / CurrMaskNumEls); 6238 EVT SubVT = Mask->getValueType(0); 6239 SmallVector<SDValue, 16> SubOps(NumSubVecs, DAG.getUNDEF(SubVT)); 6240 SubOps[0] = Mask; 6241 Mask = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(Mask), ToMaskVT, SubOps); 6242 } 6243 6244 assert((Mask->getValueType(0) == ToMaskVT) && 6245 "A mask of ToMaskVT should have been produced by now."); 6246 6247 return Mask; 6248 } 6249 6250 // This method tries to handle some special cases for the vselect mask 6251 // and if needed adjusting the mask vector type to match that of the VSELECT. 6252 // Without it, many cases end up with scalarization of the SETCC, with many 6253 // unnecessary instructions. 6254 SDValue DAGTypeLegalizer::WidenVSELECTMask(SDNode *N) { 6255 LLVMContext &Ctx = *DAG.getContext(); 6256 SDValue Cond = N->getOperand(0); 6257 6258 if (N->getOpcode() != ISD::VSELECT) 6259 return SDValue(); 6260 6261 if (!isSETCCOp(Cond->getOpcode()) && !isLogicalMaskOp(Cond->getOpcode())) 6262 return SDValue(); 6263 6264 // If this is a splitted VSELECT that was previously already handled, do 6265 // nothing. 6266 EVT CondVT = Cond->getValueType(0); 6267 if (CondVT.getScalarSizeInBits() != 1) 6268 return SDValue(); 6269 6270 EVT VSelVT = N->getValueType(0); 6271 6272 // This method can't handle scalable vector types. 6273 // FIXME: This support could be added in the future. 6274 if (VSelVT.isScalableVector()) 6275 return SDValue(); 6276 6277 // Only handle vector types which are a power of 2. 6278 if (!isPowerOf2_64(VSelVT.getSizeInBits())) 6279 return SDValue(); 6280 6281 // Don't touch if this will be scalarized. 6282 EVT FinalVT = VSelVT; 6283 while (getTypeAction(FinalVT) == TargetLowering::TypeSplitVector) 6284 FinalVT = FinalVT.getHalfNumVectorElementsVT(Ctx); 6285 6286 if (FinalVT.getVectorNumElements() == 1) 6287 return SDValue(); 6288 6289 // If there is support for an i1 vector mask, don't touch. 6290 if (isSETCCOp(Cond.getOpcode())) { 6291 EVT SetCCOpVT = getSETCCOperandType(Cond); 6292 while (TLI.getTypeAction(Ctx, SetCCOpVT) != TargetLowering::TypeLegal) 6293 SetCCOpVT = TLI.getTypeToTransformTo(Ctx, SetCCOpVT); 6294 EVT SetCCResVT = getSetCCResultType(SetCCOpVT); 6295 if (SetCCResVT.getScalarSizeInBits() == 1) 6296 return SDValue(); 6297 } else if (CondVT.getScalarType() == MVT::i1) { 6298 // If there is support for an i1 vector mask (or only scalar i1 conditions), 6299 // don't touch. 6300 while (TLI.getTypeAction(Ctx, CondVT) != TargetLowering::TypeLegal) 6301 CondVT = TLI.getTypeToTransformTo(Ctx, CondVT); 6302 6303 if (CondVT.getScalarType() == MVT::i1) 6304 return SDValue(); 6305 } 6306 6307 // Widen the vselect result type if needed. 6308 if (getTypeAction(VSelVT) == TargetLowering::TypeWidenVector) 6309 VSelVT = TLI.getTypeToTransformTo(Ctx, VSelVT); 6310 6311 // The mask of the VSELECT should have integer elements. 6312 EVT ToMaskVT = VSelVT; 6313 if (!ToMaskVT.getScalarType().isInteger()) 6314 ToMaskVT = ToMaskVT.changeVectorElementTypeToInteger(); 6315 6316 SDValue Mask; 6317 if (isSETCCOp(Cond->getOpcode())) { 6318 EVT MaskVT = getSetCCResultType(getSETCCOperandType(Cond)); 6319 Mask = convertMask(Cond, MaskVT, ToMaskVT); 6320 } else if (isLogicalMaskOp(Cond->getOpcode()) && 6321 isSETCCOp(Cond->getOperand(0).getOpcode()) && 6322 isSETCCOp(Cond->getOperand(1).getOpcode())) { 6323 // Cond is (AND/OR/XOR (SETCC, SETCC)) 6324 SDValue SETCC0 = Cond->getOperand(0); 6325 SDValue SETCC1 = Cond->getOperand(1); 6326 EVT VT0 = getSetCCResultType(getSETCCOperandType(SETCC0)); 6327 EVT VT1 = getSetCCResultType(getSETCCOperandType(SETCC1)); 6328 unsigned ScalarBits0 = VT0.getScalarSizeInBits(); 6329 unsigned ScalarBits1 = VT1.getScalarSizeInBits(); 6330 unsigned ScalarBits_ToMask = ToMaskVT.getScalarSizeInBits(); 6331 EVT MaskVT; 6332 // If the two SETCCs have different VTs, either extend/truncate one of 6333 // them to the other "towards" ToMaskVT, or truncate one and extend the 6334 // other to ToMaskVT. 6335 if (ScalarBits0 != ScalarBits1) { 6336 EVT NarrowVT = ((ScalarBits0 < ScalarBits1) ? VT0 : VT1); 6337 EVT WideVT = ((NarrowVT == VT0) ? VT1 : VT0); 6338 if (ScalarBits_ToMask >= WideVT.getScalarSizeInBits()) 6339 MaskVT = WideVT; 6340 else if (ScalarBits_ToMask <= NarrowVT.getScalarSizeInBits()) 6341 MaskVT = NarrowVT; 6342 else 6343 MaskVT = ToMaskVT; 6344 } else 6345 // If the two SETCCs have the same VT, don't change it. 6346 MaskVT = VT0; 6347 6348 // Make new SETCCs and logical nodes. 6349 SETCC0 = convertMask(SETCC0, VT0, MaskVT); 6350 SETCC1 = convertMask(SETCC1, VT1, MaskVT); 6351 Cond = DAG.getNode(Cond->getOpcode(), SDLoc(Cond), MaskVT, SETCC0, SETCC1); 6352 6353 // Convert the logical op for VSELECT if needed. 6354 Mask = convertMask(Cond, MaskVT, ToMaskVT); 6355 } else 6356 return SDValue(); 6357 6358 return Mask; 6359 } 6360 6361 SDValue DAGTypeLegalizer::WidenVecRes_Select(SDNode *N) { 6362 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 6363 ElementCount WidenEC = WidenVT.getVectorElementCount(); 6364 6365 SDValue Cond1 = N->getOperand(0); 6366 EVT CondVT = Cond1.getValueType(); 6367 unsigned Opcode = N->getOpcode(); 6368 if (CondVT.isVector()) { 6369 if (SDValue WideCond = WidenVSELECTMask(N)) { 6370 SDValue InOp1 = GetWidenedVector(N->getOperand(1)); 6371 SDValue InOp2 = GetWidenedVector(N->getOperand(2)); 6372 assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); 6373 return DAG.getNode(Opcode, SDLoc(N), WidenVT, WideCond, InOp1, InOp2); 6374 } 6375 6376 EVT CondEltVT = CondVT.getVectorElementType(); 6377 EVT CondWidenVT = EVT::getVectorVT(*DAG.getContext(), CondEltVT, WidenEC); 6378 if (getTypeAction(CondVT) == TargetLowering::TypeWidenVector) 6379 Cond1 = GetWidenedVector(Cond1); 6380 6381 // If we have to split the condition there is no point in widening the 6382 // select. This would result in an cycle of widening the select -> 6383 // widening the condition operand -> splitting the condition operand -> 6384 // splitting the select -> widening the select. Instead split this select 6385 // further and widen the resulting type. 6386 if (getTypeAction(CondVT) == TargetLowering::TypeSplitVector) { 6387 SDValue SplitSelect = SplitVecOp_VSELECT(N, 0); 6388 SDValue Res = ModifyToType(SplitSelect, WidenVT); 6389 return Res; 6390 } 6391 6392 if (Cond1.getValueType() != CondWidenVT) 6393 Cond1 = ModifyToType(Cond1, CondWidenVT); 6394 } 6395 6396 SDValue InOp1 = GetWidenedVector(N->getOperand(1)); 6397 SDValue InOp2 = GetWidenedVector(N->getOperand(2)); 6398 assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); 6399 if (Opcode == ISD::VP_SELECT || Opcode == ISD::VP_MERGE) 6400 return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2, 6401 N->getOperand(3)); 6402 return DAG.getNode(Opcode, SDLoc(N), WidenVT, Cond1, InOp1, InOp2); 6403 } 6404 6405 SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { 6406 SDValue InOp1 = GetWidenedVector(N->getOperand(2)); 6407 SDValue InOp2 = GetWidenedVector(N->getOperand(3)); 6408 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), 6409 InOp1.getValueType(), N->getOperand(0), 6410 N->getOperand(1), InOp1, InOp2, N->getOperand(4)); 6411 } 6412 6413 SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { 6414 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 6415 return DAG.getUNDEF(WidenVT); 6416 } 6417 6418 SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { 6419 EVT VT = N->getValueType(0); 6420 SDLoc dl(N); 6421 6422 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 6423 unsigned NumElts = VT.getVectorNumElements(); 6424 unsigned WidenNumElts = WidenVT.getVectorNumElements(); 6425 6426 SDValue InOp1 = GetWidenedVector(N->getOperand(0)); 6427 SDValue InOp2 = GetWidenedVector(N->getOperand(1)); 6428 6429 // Adjust mask based on new input vector length. 6430 SmallVector<int, 16> NewMask(WidenNumElts, -1); 6431 for (unsigned i = 0; i != NumElts; ++i) { 6432 int Idx = N->getMaskElt(i); 6433 if (Idx < (int)NumElts) 6434 NewMask[i] = Idx; 6435 else 6436 NewMask[i] = Idx - NumElts + WidenNumElts; 6437 } 6438 return DAG.getVectorShuffle(WidenVT, dl, InOp1, InOp2, NewMask); 6439 } 6440 6441 SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_REVERSE(SDNode *N) { 6442 EVT VT = N->getValueType(0); 6443 EVT EltVT = VT.getVectorElementType(); 6444 SDLoc dl(N); 6445 6446 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 6447 SDValue OpValue = GetWidenedVector(N->getOperand(0)); 6448 assert(WidenVT == OpValue.getValueType() && "Unexpected widened vector type"); 6449 6450 SDValue ReverseVal = DAG.getNode(ISD::VECTOR_REVERSE, dl, WidenVT, OpValue); 6451 unsigned WidenNumElts = WidenVT.getVectorMinNumElements(); 6452 unsigned VTNumElts = VT.getVectorMinNumElements(); 6453 unsigned IdxVal = WidenNumElts - VTNumElts; 6454 6455 if (VT.isScalableVector()) { 6456 // Try to split the 'Widen ReverseVal' into smaller extracts and concat the 6457 // results together, e.g.(nxv6i64 -> nxv8i64) 6458 // nxv8i64 vector_reverse 6459 // <-> 6460 // nxv8i64 concat( 6461 // nxv2i64 extract_subvector(nxv8i64, 2) 6462 // nxv2i64 extract_subvector(nxv8i64, 4) 6463 // nxv2i64 extract_subvector(nxv8i64, 6) 6464 // nxv2i64 undef) 6465 6466 unsigned GCD = std::gcd(VTNumElts, WidenNumElts); 6467 EVT PartVT = EVT::getVectorVT(*DAG.getContext(), EltVT, 6468 ElementCount::getScalable(GCD)); 6469 assert((IdxVal % GCD) == 0 && "Expected Idx to be a multiple of the broken " 6470 "down type's element count"); 6471 SmallVector<SDValue> Parts; 6472 unsigned i = 0; 6473 for (; i < VTNumElts / GCD; ++i) 6474 Parts.push_back( 6475 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, PartVT, ReverseVal, 6476 DAG.getVectorIdxConstant(IdxVal + i * GCD, dl))); 6477 for (; i < WidenNumElts / GCD; ++i) 6478 Parts.push_back(DAG.getUNDEF(PartVT)); 6479 6480 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, Parts); 6481 } 6482 6483 // Use VECTOR_SHUFFLE to combine new vector from 'ReverseVal' for 6484 // fixed-vectors. 6485 SmallVector<int, 16> Mask(WidenNumElts, -1); 6486 std::iota(Mask.begin(), Mask.begin() + VTNumElts, IdxVal); 6487 6488 return DAG.getVectorShuffle(WidenVT, dl, ReverseVal, DAG.getUNDEF(WidenVT), 6489 Mask); 6490 } 6491 6492 SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { 6493 assert(N->getValueType(0).isVector() && 6494 N->getOperand(0).getValueType().isVector() && 6495 "Operands must be vectors"); 6496 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); 6497 ElementCount WidenEC = WidenVT.getVectorElementCount(); 6498 6499 SDValue InOp1 = N->getOperand(0); 6500 EVT InVT = InOp1.getValueType(); 6501 assert(InVT.isVector() && "can not widen non-vector type"); 6502 EVT WidenInVT = 6503 EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), WidenEC); 6504 6505 // The input and output types often differ here, and it could be that while 6506 // we'd prefer to widen the result type, the input operands have been split. 6507 // In this case, we also need to split the result of this node as well. 6508 if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { 6509 SDValue SplitVSetCC = SplitVecOp_VSETCC(N); 6510 SDValue Res = ModifyToType(SplitVSetCC, WidenVT); 6511 return Res; 6512 } 6513 6514 // If the inputs also widen, handle them directly. Otherwise widen by hand. 6515 SDValue InOp2 = N->getOperand(1); 6516 if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) { 6517 InOp1 = GetWidenedVector(InOp1); 6518 InOp2 = GetWidenedVector(InOp2); 6519 } else { 6520 InOp1 = DAG.WidenVector(InOp1, SDLoc(N)); 6521 InOp2 = DAG.WidenVector(InOp2, SDLoc(N)); 6522 } 6523 6524 // Assume that the input and output will be widen appropriately. If not, 6525 // we will have to unroll it at some point. 6526 assert(InOp1.getValueType() == WidenInVT && 6527 InOp2.getValueType() == WidenInVT && 6528 "Input not widened to expected type!"); 6529 (void)WidenInVT; 6530 if (N->getOpcode() == ISD::VP_SETCC) { 6531 SDValue Mask = 6532 GetWidenedMask(N->getOperand(3), WidenVT.getVectorElementCount()); 6533 return DAG.getNode(ISD::VP_SETCC, SDLoc(N), WidenVT, InOp1, InOp2, 6534 N->getOperand(2), Mask, N->getOperand(4)); 6535 } 6536 return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, 6537 N->getOperand(2)); 6538 } 6539 6540 SDValue DAGTypeLegalizer::WidenVecRes_STRICT_FSETCC(SDNode *N) { 6541 assert(N->getValueType(0).isVector() && 6542 N->getOperand(1).getValueType().isVector() && 6543 "Operands must be vectors"); 6544 EVT VT = N->getValueType(0); 6545 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); 6546 unsigned WidenNumElts = WidenVT.getVectorNumElements(); 6547 unsigned NumElts = VT.getVectorNumElements(); 6548 EVT EltVT = VT.getVectorElementType(); 6549 6550 SDLoc dl(N); 6551 SDValue Chain = N->getOperand(0); 6552 SDValue LHS = N->getOperand(1); 6553 SDValue RHS = N->getOperand(2); 6554 SDValue CC = N->getOperand(3); 6555 EVT TmpEltVT = LHS.getValueType().getVectorElementType(); 6556 6557 // Fully unroll and reassemble. 6558 SmallVector<SDValue, 8> Scalars(WidenNumElts, DAG.getUNDEF(EltVT)); 6559 SmallVector<SDValue, 8> Chains(NumElts); 6560 for (unsigned i = 0; i != NumElts; ++i) { 6561 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, 6562 DAG.getVectorIdxConstant(i, dl)); 6563 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, 6564 DAG.getVectorIdxConstant(i, dl)); 6565 6566 Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, 6567 {Chain, LHSElem, RHSElem, CC}); 6568 Chains[i] = Scalars[i].getValue(1); 6569 Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i], 6570 DAG.getBoolConstant(true, dl, EltVT, VT), 6571 DAG.getBoolConstant(false, dl, EltVT, VT)); 6572 } 6573 6574 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); 6575 ReplaceValueWith(SDValue(N, 1), NewChain); 6576 6577 return DAG.getBuildVector(WidenVT, dl, Scalars); 6578 } 6579 6580 //===----------------------------------------------------------------------===// 6581 // Widen Vector Operand 6582 //===----------------------------------------------------------------------===// 6583 bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { 6584 LLVM_DEBUG(dbgs() << "Widen node operand " << OpNo << ": "; N->dump(&DAG)); 6585 SDValue Res = SDValue(); 6586 6587 // See if the target wants to custom widen this node. 6588 if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) 6589 return false; 6590 6591 switch (N->getOpcode()) { 6592 default: 6593 #ifndef NDEBUG 6594 dbgs() << "WidenVectorOperand op #" << OpNo << ": "; 6595 N->dump(&DAG); 6596 dbgs() << "\n"; 6597 #endif 6598 report_fatal_error("Do not know how to widen this operator's operand!"); 6599 6600 case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break; 6601 case ISD::FAKE_USE: 6602 Res = WidenVecOp_FAKE_USE(N); 6603 break; 6604 case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break; 6605 case ISD::INSERT_SUBVECTOR: Res = WidenVecOp_INSERT_SUBVECTOR(N); break; 6606 case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; 6607 case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; 6608 case ISD::STORE: Res = WidenVecOp_STORE(N); break; 6609 case ISD::VP_STORE: Res = WidenVecOp_VP_STORE(N, OpNo); break; 6610 case ISD::EXPERIMENTAL_VP_STRIDED_STORE: 6611 Res = WidenVecOp_VP_STRIDED_STORE(N, OpNo); 6612 break; 6613 case ISD::ANY_EXTEND_VECTOR_INREG: 6614 case ISD::SIGN_EXTEND_VECTOR_INREG: 6615 case ISD::ZERO_EXTEND_VECTOR_INREG: 6616 Res = WidenVecOp_EXTEND_VECTOR_INREG(N); 6617 break; 6618 case ISD::MSTORE: Res = WidenVecOp_MSTORE(N, OpNo); break; 6619 case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break; 6620 case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break; 6621 case ISD::VP_SCATTER: Res = WidenVecOp_VP_SCATTER(N, OpNo); break; 6622 case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; 6623 case ISD::STRICT_FSETCC: 6624 case ISD::STRICT_FSETCCS: Res = WidenVecOp_STRICT_FSETCC(N); break; 6625 case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break; 6626 case ISD::FLDEXP: 6627 case ISD::FCOPYSIGN: 6628 case ISD::LROUND: 6629 case ISD::LLROUND: 6630 case ISD::LRINT: 6631 case ISD::LLRINT: 6632 Res = WidenVecOp_UnrollVectorOp(N); 6633 break; 6634 case ISD::IS_FPCLASS: Res = WidenVecOp_IS_FPCLASS(N); break; 6635 6636 case ISD::ANY_EXTEND: 6637 case ISD::SIGN_EXTEND: 6638 case ISD::ZERO_EXTEND: 6639 Res = WidenVecOp_EXTEND(N); 6640 break; 6641 6642 case ISD::SCMP: 6643 case ISD::UCMP: 6644 Res = WidenVecOp_CMP(N); 6645 break; 6646 6647 case ISD::FP_EXTEND: 6648 case ISD::STRICT_FP_EXTEND: 6649 case ISD::FP_ROUND: 6650 case ISD::STRICT_FP_ROUND: 6651 case ISD::FP_TO_SINT: 6652 case ISD::STRICT_FP_TO_SINT: 6653 case ISD::FP_TO_UINT: 6654 case ISD::STRICT_FP_TO_UINT: 6655 case ISD::SINT_TO_FP: 6656 case ISD::STRICT_SINT_TO_FP: 6657 case ISD::UINT_TO_FP: 6658 case ISD::STRICT_UINT_TO_FP: 6659 case ISD::TRUNCATE: 6660 Res = WidenVecOp_Convert(N); 6661 break; 6662 6663 case ISD::FP_TO_SINT_SAT: 6664 case ISD::FP_TO_UINT_SAT: 6665 Res = WidenVecOp_FP_TO_XINT_SAT(N); 6666 break; 6667 6668 case ISD::EXPERIMENTAL_VP_SPLAT: 6669 Res = WidenVecOp_VP_SPLAT(N, OpNo); 6670 break; 6671 6672 case ISD::VECREDUCE_FADD: 6673 case ISD::VECREDUCE_FMUL: 6674 case ISD::VECREDUCE_ADD: 6675 case ISD::VECREDUCE_MUL: 6676 case ISD::VECREDUCE_AND: 6677 case ISD::VECREDUCE_OR: 6678 case ISD::VECREDUCE_XOR: 6679 case ISD::VECREDUCE_SMAX: 6680 case ISD::VECREDUCE_SMIN: 6681 case ISD::VECREDUCE_UMAX: 6682 case ISD::VECREDUCE_UMIN: 6683 case ISD::VECREDUCE_FMAX: 6684 case ISD::VECREDUCE_FMIN: 6685 case ISD::VECREDUCE_FMAXIMUM: 6686 case ISD::VECREDUCE_FMINIMUM: 6687 Res = WidenVecOp_VECREDUCE(N); 6688 break; 6689 case ISD::VECREDUCE_SEQ_FADD: 6690 case ISD::VECREDUCE_SEQ_FMUL: 6691 Res = WidenVecOp_VECREDUCE_SEQ(N); 6692 break; 6693 case ISD::VP_REDUCE_FADD: 6694 case ISD::VP_REDUCE_SEQ_FADD: 6695 case ISD::VP_REDUCE_FMUL: 6696 case ISD::VP_REDUCE_SEQ_FMUL: 6697 case ISD::VP_REDUCE_ADD: 6698 case ISD::VP_REDUCE_MUL: 6699 case ISD::VP_REDUCE_AND: 6700 case ISD::VP_REDUCE_OR: 6701 case ISD::VP_REDUCE_XOR: 6702 case ISD::VP_REDUCE_SMAX: 6703 case ISD::VP_REDUCE_SMIN: 6704 case ISD::VP_REDUCE_UMAX: 6705 case ISD::VP_REDUCE_UMIN: 6706 case ISD::VP_REDUCE_FMAX: 6707 case ISD::VP_REDUCE_FMIN: 6708 case ISD::VP_REDUCE_FMAXIMUM: 6709 case ISD::VP_REDUCE_FMINIMUM: 6710 Res = WidenVecOp_VP_REDUCE(N); 6711 break; 6712 case ISD::VP_CTTZ_ELTS: 6713 case ISD::VP_CTTZ_ELTS_ZERO_UNDEF: 6714 Res = WidenVecOp_VP_CttzElements(N); 6715 break; 6716 } 6717 6718 // If Res is null, the sub-method took care of registering the result. 6719 if (!Res.getNode()) return false; 6720 6721 // If the result is N, the sub-method updated N in place. Tell the legalizer 6722 // core about this. 6723 if (Res.getNode() == N) 6724 return true; 6725 6726 6727 if (N->isStrictFPOpcode()) 6728 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 && 6729 "Invalid operand expansion"); 6730 else 6731 assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 && 6732 "Invalid operand expansion"); 6733 6734 ReplaceValueWith(SDValue(N, 0), Res); 6735 return false; 6736 } 6737 6738 SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { 6739 SDLoc DL(N); 6740 EVT VT = N->getValueType(0); 6741 6742 SDValue InOp = N->getOperand(0); 6743 assert(getTypeAction(InOp.getValueType()) == 6744 TargetLowering::TypeWidenVector && 6745 "Unexpected type action"); 6746 InOp = GetWidenedVector(InOp); 6747 assert(VT.getVectorNumElements() < 6748 InOp.getValueType().getVectorNumElements() && 6749 "Input wasn't widened!"); 6750 6751 // We may need to further widen the operand until it has the same total 6752 // vector size as the result. 6753 EVT InVT = InOp.getValueType(); 6754 if (InVT.getSizeInBits() != VT.getSizeInBits()) { 6755 EVT InEltVT = InVT.getVectorElementType(); 6756 for (EVT FixedVT : MVT::vector_valuetypes()) { 6757 EVT FixedEltVT = FixedVT.getVectorElementType(); 6758 if (TLI.isTypeLegal(FixedVT) && 6759 FixedVT.getSizeInBits() == VT.getSizeInBits() && 6760 FixedEltVT == InEltVT) { 6761 assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && 6762 "Not enough elements in the fixed type for the operand!"); 6763 assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && 6764 "We can't have the same type as we started with!"); 6765 if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) 6766 InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, 6767 DAG.getUNDEF(FixedVT), InOp, 6768 DAG.getVectorIdxConstant(0, DL)); 6769 else 6770 InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, 6771 DAG.getVectorIdxConstant(0, DL)); 6772 break; 6773 } 6774 } 6775 InVT = InOp.getValueType(); 6776 if (InVT.getSizeInBits() != VT.getSizeInBits()) 6777 // We couldn't find a legal vector type that was a widening of the input 6778 // and could be extended in-register to the result type, so we have to 6779 // scalarize. 6780 return WidenVecOp_Convert(N); 6781 } 6782 6783 // Use special DAG nodes to represent the operation of extending the 6784 // low lanes. 6785 switch (N->getOpcode()) { 6786 default: 6787 llvm_unreachable("Extend legalization on extend operation!"); 6788 case ISD::ANY_EXTEND: 6789 return DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, InOp); 6790 case ISD::SIGN_EXTEND: 6791 return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, InOp); 6792 case ISD::ZERO_EXTEND: 6793 return DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, InOp); 6794 } 6795 } 6796 6797 SDValue DAGTypeLegalizer::WidenVecOp_CMP(SDNode *N) { 6798 SDLoc dl(N); 6799 6800 EVT OpVT = N->getOperand(0).getValueType(); 6801 EVT ResVT = N->getValueType(0); 6802 SDValue LHS = GetWidenedVector(N->getOperand(0)); 6803 SDValue RHS = GetWidenedVector(N->getOperand(1)); 6804 6805 // 1. EXTRACT_SUBVECTOR 6806 // 2. SIGN_EXTEND/ZERO_EXTEND 6807 // 3. CMP 6808 LHS = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, LHS, 6809 DAG.getVectorIdxConstant(0, dl)); 6810 RHS = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, RHS, 6811 DAG.getVectorIdxConstant(0, dl)); 6812 6813 // At this point the result type is guaranteed to be valid, so we can use it 6814 // as the operand type by extending it appropriately 6815 ISD::NodeType ExtendOpcode = 6816 N->getOpcode() == ISD::SCMP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 6817 LHS = DAG.getNode(ExtendOpcode, dl, ResVT, LHS); 6818 RHS = DAG.getNode(ExtendOpcode, dl, ResVT, RHS); 6819 6820 return DAG.getNode(N->getOpcode(), dl, ResVT, LHS, RHS); 6821 } 6822 6823 SDValue DAGTypeLegalizer::WidenVecOp_UnrollVectorOp(SDNode *N) { 6824 // The result (and first input) is legal, but the second input is illegal. 6825 // We can't do much to fix that, so just unroll and let the extracts off of 6826 // the second input be widened as needed later. 6827 return DAG.UnrollVectorOp(N); 6828 } 6829 6830 SDValue DAGTypeLegalizer::WidenVecOp_IS_FPCLASS(SDNode *N) { 6831 SDLoc DL(N); 6832 EVT ResultVT = N->getValueType(0); 6833 SDValue Test = N->getOperand(1); 6834 SDValue WideArg = GetWidenedVector(N->getOperand(0)); 6835 6836 // Process this node similarly to SETCC. 6837 EVT WideResultVT = getSetCCResultType(WideArg.getValueType()); 6838 if (ResultVT.getScalarType() == MVT::i1) 6839 WideResultVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 6840 WideResultVT.getVectorNumElements()); 6841 6842 SDValue WideNode = DAG.getNode(ISD::IS_FPCLASS, DL, WideResultVT, 6843 {WideArg, Test}, N->getFlags()); 6844 6845 // Extract the needed results from the result vector. 6846 EVT ResVT = 6847 EVT::getVectorVT(*DAG.getContext(), WideResultVT.getVectorElementType(), 6848 ResultVT.getVectorNumElements()); 6849 SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ResVT, WideNode, 6850 DAG.getVectorIdxConstant(0, DL)); 6851 6852 EVT OpVT = N->getOperand(0).getValueType(); 6853 ISD::NodeType ExtendCode = 6854 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); 6855 return DAG.getNode(ExtendCode, DL, ResultVT, CC); 6856 } 6857 6858 SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { 6859 // Since the result is legal and the input is illegal. 6860 EVT VT = N->getValueType(0); 6861 EVT EltVT = VT.getVectorElementType(); 6862 SDLoc dl(N); 6863 SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0); 6864 assert(getTypeAction(InOp.getValueType()) == 6865 TargetLowering::TypeWidenVector && 6866 "Unexpected type action"); 6867 InOp = GetWidenedVector(InOp); 6868 EVT InVT = InOp.getValueType(); 6869 unsigned Opcode = N->getOpcode(); 6870 6871 // See if a widened result type would be legal, if so widen the node. 6872 // FIXME: This isn't safe for StrictFP. Other optimization here is needed. 6873 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, 6874 InVT.getVectorElementCount()); 6875 if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) { 6876 SDValue Res; 6877 if (N->isStrictFPOpcode()) { 6878 if (Opcode == ISD::STRICT_FP_ROUND) 6879 Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, 6880 { N->getOperand(0), InOp, N->getOperand(2) }); 6881 else 6882 Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other }, 6883 { N->getOperand(0), InOp }); 6884 // Legalize the chain result - switch anything that used the old chain to 6885 // use the new one. 6886 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 6887 } else { 6888 if (Opcode == ISD::FP_ROUND) 6889 Res = DAG.getNode(Opcode, dl, WideVT, InOp, N->getOperand(1)); 6890 else 6891 Res = DAG.getNode(Opcode, dl, WideVT, InOp); 6892 } 6893 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, 6894 DAG.getVectorIdxConstant(0, dl)); 6895 } 6896 6897 EVT InEltVT = InVT.getVectorElementType(); 6898 6899 // Unroll the convert into some scalar code and create a nasty build vector. 6900 unsigned NumElts = VT.getVectorNumElements(); 6901 SmallVector<SDValue, 16> Ops(NumElts); 6902 if (N->isStrictFPOpcode()) { 6903 SmallVector<SDValue, 4> NewOps(N->ops()); 6904 SmallVector<SDValue, 32> OpChains; 6905 for (unsigned i=0; i < NumElts; ++i) { 6906 NewOps[1] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, 6907 DAG.getVectorIdxConstant(i, dl)); 6908 Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps); 6909 OpChains.push_back(Ops[i].getValue(1)); 6910 } 6911 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains); 6912 ReplaceValueWith(SDValue(N, 1), NewChain); 6913 } else { 6914 for (unsigned i = 0; i < NumElts; ++i) 6915 Ops[i] = DAG.getNode(Opcode, dl, EltVT, 6916 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, 6917 InOp, DAG.getVectorIdxConstant(i, dl))); 6918 } 6919 6920 return DAG.getBuildVector(VT, dl, Ops); 6921 } 6922 6923 SDValue DAGTypeLegalizer::WidenVecOp_FP_TO_XINT_SAT(SDNode *N) { 6924 EVT DstVT = N->getValueType(0); 6925 SDValue Src = GetWidenedVector(N->getOperand(0)); 6926 EVT SrcVT = Src.getValueType(); 6927 ElementCount WideNumElts = SrcVT.getVectorElementCount(); 6928 SDLoc dl(N); 6929 6930 // See if a widened result type would be legal, if so widen the node. 6931 EVT WideDstVT = EVT::getVectorVT(*DAG.getContext(), 6932 DstVT.getVectorElementType(), WideNumElts); 6933 if (TLI.isTypeLegal(WideDstVT)) { 6934 SDValue Res = 6935 DAG.getNode(N->getOpcode(), dl, WideDstVT, Src, N->getOperand(1)); 6936 return DAG.getNode( 6937 ISD::EXTRACT_SUBVECTOR, dl, DstVT, Res, 6938 DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))); 6939 } 6940 6941 // Give up and unroll. 6942 return DAG.UnrollVectorOp(N); 6943 } 6944 6945 SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { 6946 EVT VT = N->getValueType(0); 6947 SDValue InOp = GetWidenedVector(N->getOperand(0)); 6948 EVT InWidenVT = InOp.getValueType(); 6949 SDLoc dl(N); 6950 6951 // Check if we can convert between two legal vector types and extract. 6952 TypeSize InWidenSize = InWidenVT.getSizeInBits(); 6953 TypeSize Size = VT.getSizeInBits(); 6954 // x86mmx is not an acceptable vector element type, so don't try. 6955 if (!VT.isVector() && VT != MVT::x86mmx && 6956 InWidenSize.hasKnownScalarFactor(Size)) { 6957 unsigned NewNumElts = InWidenSize.getKnownScalarFactor(Size); 6958 EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); 6959 if (TLI.isTypeLegal(NewVT)) { 6960 SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); 6961 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, 6962 DAG.getVectorIdxConstant(0, dl)); 6963 } 6964 } 6965 6966 // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened 6967 // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not, 6968 // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids 6969 // having to copy via memory. 6970 if (VT.isVector()) { 6971 EVT EltVT = VT.getVectorElementType(); 6972 unsigned EltSize = EltVT.getFixedSizeInBits(); 6973 if (InWidenSize.isKnownMultipleOf(EltSize)) { 6974 ElementCount NewNumElts = 6975 (InWidenVT.getVectorElementCount() * InWidenVT.getScalarSizeInBits()) 6976 .divideCoefficientBy(EltSize); 6977 EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts); 6978 if (TLI.isTypeLegal(NewVT)) { 6979 SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); 6980 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp, 6981 DAG.getVectorIdxConstant(0, dl)); 6982 } 6983 } 6984 } 6985 6986 return CreateStackStoreLoad(InOp, VT); 6987 } 6988 6989 // Vectors with sizes that are not powers of 2 need to be widened to the 6990 // next largest power of 2. For example, we may get a vector of 3 32-bit 6991 // integers or of 6 16-bit integers, both of which have to be widened to a 6992 // 128-bit vector. 6993 SDValue DAGTypeLegalizer::WidenVecOp_FAKE_USE(SDNode *N) { 6994 SDValue WidenedOp = GetWidenedVector(N->getOperand(1)); 6995 return DAG.getNode(ISD::FAKE_USE, SDLoc(), MVT::Other, N->getOperand(0), 6996 WidenedOp); 6997 } 6998 6999 SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { 7000 EVT VT = N->getValueType(0); 7001 EVT EltVT = VT.getVectorElementType(); 7002 EVT InVT = N->getOperand(0).getValueType(); 7003 SDLoc dl(N); 7004 7005 // If the widen width for this operand is the same as the width of the concat 7006 // and all but the first operand is undef, just use the widened operand. 7007 unsigned NumOperands = N->getNumOperands(); 7008 if (VT == TLI.getTypeToTransformTo(*DAG.getContext(), InVT)) { 7009 unsigned i; 7010 for (i = 1; i < NumOperands; ++i) 7011 if (!N->getOperand(i).isUndef()) 7012 break; 7013 7014 if (i == NumOperands) 7015 return GetWidenedVector(N->getOperand(0)); 7016 } 7017 7018 // Otherwise, fall back to a nasty build vector. 7019 unsigned NumElts = VT.getVectorNumElements(); 7020 SmallVector<SDValue, 16> Ops(NumElts); 7021 7022 unsigned NumInElts = InVT.getVectorNumElements(); 7023 7024 unsigned Idx = 0; 7025 for (unsigned i=0; i < NumOperands; ++i) { 7026 SDValue InOp = N->getOperand(i); 7027 assert(getTypeAction(InOp.getValueType()) == 7028 TargetLowering::TypeWidenVector && 7029 "Unexpected type action"); 7030 InOp = GetWidenedVector(InOp); 7031 for (unsigned j = 0; j < NumInElts; ++j) 7032 Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, 7033 DAG.getVectorIdxConstant(j, dl)); 7034 } 7035 return DAG.getBuildVector(VT, dl, Ops); 7036 } 7037 7038 SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) { 7039 EVT VT = N->getValueType(0); 7040 SDValue SubVec = N->getOperand(1); 7041 SDValue InVec = N->getOperand(0); 7042 7043 EVT OrigVT = SubVec.getValueType(); 7044 if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector) 7045 SubVec = GetWidenedVector(SubVec); 7046 7047 EVT SubVT = SubVec.getValueType(); 7048 7049 // Whether or not all the elements of the widened SubVec will be inserted into 7050 // valid indices of VT. 7051 bool IndicesValid = false; 7052 // If we statically know that VT can fit SubVT, the indices are valid. 7053 if (VT.knownBitsGE(SubVT)) 7054 IndicesValid = true; 7055 else if (VT.isScalableVector() && SubVT.isFixedLengthVector()) { 7056 // Otherwise, if we're inserting a fixed vector into a scalable vector and 7057 // we know the minimum vscale we can work out if it's valid ourselves. 7058 Attribute Attr = DAG.getMachineFunction().getFunction().getFnAttribute( 7059 Attribute::VScaleRange); 7060 if (Attr.isValid()) { 7061 unsigned VScaleMin = Attr.getVScaleRangeMin(); 7062 if (VT.getSizeInBits().getKnownMinValue() * VScaleMin >= 7063 SubVT.getFixedSizeInBits()) 7064 IndicesValid = true; 7065 } 7066 } 7067 7068 SDLoc DL(N); 7069 7070 // We need to make sure that the indices are still valid, otherwise we might 7071 // widen what was previously well-defined to something undefined. 7072 if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0) 7073 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, InVec, SubVec, 7074 N->getOperand(2)); 7075 7076 if (!IndicesValid || OrigVT.isScalableVector()) 7077 report_fatal_error( 7078 "Don't know how to widen the operands for INSERT_SUBVECTOR"); 7079 7080 // If the operands can't be widened legally, just replace the INSERT_SUBVECTOR 7081 // with a series of INSERT_VECTOR_ELT 7082 unsigned Idx = N->getConstantOperandVal(2); 7083 7084 SDValue InsertElt = InVec; 7085 EVT VectorIdxTy = TLI.getVectorIdxTy(DAG.getDataLayout()); 7086 for (unsigned I = 0, E = OrigVT.getVectorNumElements(); I != E; ++I) { 7087 SDValue ExtractElt = 7088 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT.getVectorElementType(), 7089 SubVec, DAG.getConstant(I, DL, VectorIdxTy)); 7090 InsertElt = 7091 DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, InsertElt, ExtractElt, 7092 DAG.getConstant(I + Idx, DL, VectorIdxTy)); 7093 } 7094 7095 return InsertElt; 7096 } 7097 7098 SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { 7099 SDValue InOp = GetWidenedVector(N->getOperand(0)); 7100 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), 7101 N->getValueType(0), InOp, N->getOperand(1)); 7102 } 7103 7104 SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { 7105 SDValue InOp = GetWidenedVector(N->getOperand(0)); 7106 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), 7107 N->getValueType(0), InOp, N->getOperand(1)); 7108 } 7109 7110 SDValue DAGTypeLegalizer::WidenVecOp_EXTEND_VECTOR_INREG(SDNode *N) { 7111 SDValue InOp = GetWidenedVector(N->getOperand(0)); 7112 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), InOp); 7113 } 7114 7115 SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { 7116 // We have to widen the value, but we want only to store the original 7117 // vector type. 7118 StoreSDNode *ST = cast<StoreSDNode>(N); 7119 7120 if (!ST->getMemoryVT().getScalarType().isByteSized()) 7121 return TLI.scalarizeVectorStore(ST, DAG); 7122 7123 if (ST->isTruncatingStore()) 7124 return TLI.scalarizeVectorStore(ST, DAG); 7125 7126 // Generate a vector-predicated store if it is custom/legal on the target. 7127 // To avoid possible recursion, only do this if the widened mask type is 7128 // legal. 7129 // FIXME: Not all targets may support EVL in VP_STORE. These will have been 7130 // removed from the IR by the ExpandVectorPredication pass but we're 7131 // reintroducing them here. 7132 SDValue StVal = ST->getValue(); 7133 EVT StVT = StVal.getValueType(); 7134 EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT); 7135 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 7136 WideVT.getVectorElementCount()); 7137 7138 if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) && 7139 TLI.isTypeLegal(WideMaskVT)) { 7140 // Widen the value. 7141 SDLoc DL(N); 7142 StVal = GetWidenedVector(StVal); 7143 SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT); 7144 SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(), 7145 StVT.getVectorElementCount()); 7146 return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(), 7147 DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask, 7148 EVL, StVT, ST->getMemOperand(), 7149 ST->getAddressingMode()); 7150 } 7151 7152 SmallVector<SDValue, 16> StChain; 7153 if (GenWidenVectorStores(StChain, ST)) { 7154 if (StChain.size() == 1) 7155 return StChain[0]; 7156 7157 return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain); 7158 } 7159 7160 report_fatal_error("Unable to widen vector store"); 7161 } 7162 7163 SDValue DAGTypeLegalizer::WidenVecOp_VP_SPLAT(SDNode *N, unsigned OpNo) { 7164 assert(OpNo == 1 && "Can widen only mask operand of vp_splat"); 7165 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 7166 N->getOperand(0), GetWidenedVector(N->getOperand(1)), 7167 N->getOperand(2)); 7168 } 7169 7170 SDValue DAGTypeLegalizer::WidenVecOp_VP_STORE(SDNode *N, unsigned OpNo) { 7171 assert((OpNo == 1 || OpNo == 3) && 7172 "Can widen only data or mask operand of vp_store"); 7173 VPStoreSDNode *ST = cast<VPStoreSDNode>(N); 7174 SDValue Mask = ST->getMask(); 7175 SDValue StVal = ST->getValue(); 7176 SDLoc dl(N); 7177 7178 if (OpNo == 1) { 7179 // Widen the value. 7180 StVal = GetWidenedVector(StVal); 7181 7182 // We only handle the case where the mask needs widening to an 7183 // identically-sized type as the vector inputs. 7184 assert(getTypeAction(Mask.getValueType()) == 7185 TargetLowering::TypeWidenVector && 7186 "Unable to widen VP store"); 7187 Mask = GetWidenedVector(Mask); 7188 } else { 7189 Mask = GetWidenedVector(Mask); 7190 7191 // We only handle the case where the stored value needs widening to an 7192 // identically-sized type as the mask. 7193 assert(getTypeAction(StVal.getValueType()) == 7194 TargetLowering::TypeWidenVector && 7195 "Unable to widen VP store"); 7196 StVal = GetWidenedVector(StVal); 7197 } 7198 7199 assert(Mask.getValueType().getVectorElementCount() == 7200 StVal.getValueType().getVectorElementCount() && 7201 "Mask and data vectors should have the same number of elements"); 7202 return DAG.getStoreVP(ST->getChain(), dl, StVal, ST->getBasePtr(), 7203 ST->getOffset(), Mask, ST->getVectorLength(), 7204 ST->getMemoryVT(), ST->getMemOperand(), 7205 ST->getAddressingMode(), ST->isTruncatingStore(), 7206 ST->isCompressingStore()); 7207 } 7208 7209 SDValue DAGTypeLegalizer::WidenVecOp_VP_STRIDED_STORE(SDNode *N, 7210 unsigned OpNo) { 7211 assert((OpNo == 1 || OpNo == 4) && 7212 "Can widen only data or mask operand of vp_strided_store"); 7213 VPStridedStoreSDNode *SST = cast<VPStridedStoreSDNode>(N); 7214 SDValue Mask = SST->getMask(); 7215 SDValue StVal = SST->getValue(); 7216 SDLoc DL(N); 7217 7218 if (OpNo == 1) 7219 assert(getTypeAction(Mask.getValueType()) == 7220 TargetLowering::TypeWidenVector && 7221 "Unable to widen VP strided store"); 7222 else 7223 assert(getTypeAction(StVal.getValueType()) == 7224 TargetLowering::TypeWidenVector && 7225 "Unable to widen VP strided store"); 7226 7227 StVal = GetWidenedVector(StVal); 7228 Mask = GetWidenedVector(Mask); 7229 7230 assert(StVal.getValueType().getVectorElementCount() == 7231 Mask.getValueType().getVectorElementCount() && 7232 "Data and mask vectors should have the same number of elements"); 7233 7234 return DAG.getStridedStoreVP( 7235 SST->getChain(), DL, StVal, SST->getBasePtr(), SST->getOffset(), 7236 SST->getStride(), Mask, SST->getVectorLength(), SST->getMemoryVT(), 7237 SST->getMemOperand(), SST->getAddressingMode(), SST->isTruncatingStore(), 7238 SST->isCompressingStore()); 7239 } 7240 7241 SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) { 7242 assert((OpNo == 1 || OpNo == 4) && 7243 "Can widen only data or mask operand of mstore"); 7244 MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N); 7245 SDValue Mask = MST->getMask(); 7246 EVT MaskVT = Mask.getValueType(); 7247 SDValue StVal = MST->getValue(); 7248 SDLoc dl(N); 7249 7250 if (OpNo == 1) { 7251 // Widen the value. 7252 StVal = GetWidenedVector(StVal); 7253 7254 // The mask should be widened as well. 7255 EVT WideVT = StVal.getValueType(); 7256 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), 7257 MaskVT.getVectorElementType(), 7258 WideVT.getVectorNumElements()); 7259 Mask = ModifyToType(Mask, WideMaskVT, true); 7260 } else { 7261 // Widen the mask. 7262 EVT WideMaskVT = TLI.getTypeToTransformTo(*DAG.getContext(), MaskVT); 7263 Mask = ModifyToType(Mask, WideMaskVT, true); 7264 7265 EVT ValueVT = StVal.getValueType(); 7266 EVT WideVT = EVT::getVectorVT(*DAG.getContext(), 7267 ValueVT.getVectorElementType(), 7268 WideMaskVT.getVectorNumElements()); 7269 StVal = ModifyToType(StVal, WideVT); 7270 } 7271 7272 assert(Mask.getValueType().getVectorNumElements() == 7273 StVal.getValueType().getVectorNumElements() && 7274 "Mask and data vectors should have the same number of elements"); 7275 return DAG.getMaskedStore(MST->getChain(), dl, StVal, MST->getBasePtr(), 7276 MST->getOffset(), Mask, MST->getMemoryVT(), 7277 MST->getMemOperand(), MST->getAddressingMode(), 7278 false, MST->isCompressingStore()); 7279 } 7280 7281 SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) { 7282 assert(OpNo == 4 && "Can widen only the index of mgather"); 7283 auto *MG = cast<MaskedGatherSDNode>(N); 7284 SDValue DataOp = MG->getPassThru(); 7285 SDValue Mask = MG->getMask(); 7286 SDValue Scale = MG->getScale(); 7287 7288 // Just widen the index. It's allowed to have extra elements. 7289 SDValue Index = GetWidenedVector(MG->getIndex()); 7290 7291 SDLoc dl(N); 7292 SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index, 7293 Scale}; 7294 SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops, 7295 MG->getMemOperand(), MG->getIndexType(), 7296 MG->getExtensionType()); 7297 ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); 7298 ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); 7299 return SDValue(); 7300 } 7301 7302 SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) { 7303 MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N); 7304 SDValue DataOp = MSC->getValue(); 7305 SDValue Mask = MSC->getMask(); 7306 SDValue Index = MSC->getIndex(); 7307 SDValue Scale = MSC->getScale(); 7308 EVT WideMemVT = MSC->getMemoryVT(); 7309 7310 if (OpNo == 1) { 7311 DataOp = GetWidenedVector(DataOp); 7312 unsigned NumElts = DataOp.getValueType().getVectorNumElements(); 7313 7314 // Widen index. 7315 EVT IndexVT = Index.getValueType(); 7316 EVT WideIndexVT = EVT::getVectorVT(*DAG.getContext(), 7317 IndexVT.getVectorElementType(), NumElts); 7318 Index = ModifyToType(Index, WideIndexVT); 7319 7320 // The mask should be widened as well. 7321 EVT MaskVT = Mask.getValueType(); 7322 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), 7323 MaskVT.getVectorElementType(), NumElts); 7324 Mask = ModifyToType(Mask, WideMaskVT, true); 7325 7326 // Widen the MemoryType 7327 WideMemVT = EVT::getVectorVT(*DAG.getContext(), 7328 MSC->getMemoryVT().getScalarType(), NumElts); 7329 } else if (OpNo == 4) { 7330 // Just widen the index. It's allowed to have extra elements. 7331 Index = GetWidenedVector(Index); 7332 } else 7333 llvm_unreachable("Can't widen this operand of mscatter"); 7334 7335 SDValue Ops[] = {MSC->getChain(), DataOp, Mask, MSC->getBasePtr(), Index, 7336 Scale}; 7337 return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), 7338 Ops, MSC->getMemOperand(), MSC->getIndexType(), 7339 MSC->isTruncatingStore()); 7340 } 7341 7342 SDValue DAGTypeLegalizer::WidenVecOp_VP_SCATTER(SDNode *N, unsigned OpNo) { 7343 VPScatterSDNode *VPSC = cast<VPScatterSDNode>(N); 7344 SDValue DataOp = VPSC->getValue(); 7345 SDValue Mask = VPSC->getMask(); 7346 SDValue Index = VPSC->getIndex(); 7347 SDValue Scale = VPSC->getScale(); 7348 EVT WideMemVT = VPSC->getMemoryVT(); 7349 7350 if (OpNo == 1) { 7351 DataOp = GetWidenedVector(DataOp); 7352 Index = GetWidenedVector(Index); 7353 const auto WideEC = DataOp.getValueType().getVectorElementCount(); 7354 Mask = GetWidenedMask(Mask, WideEC); 7355 WideMemVT = EVT::getVectorVT(*DAG.getContext(), 7356 VPSC->getMemoryVT().getScalarType(), WideEC); 7357 } else if (OpNo == 3) { 7358 // Just widen the index. It's allowed to have extra elements. 7359 Index = GetWidenedVector(Index); 7360 } else 7361 llvm_unreachable("Can't widen this operand of VP_SCATTER"); 7362 7363 SDValue Ops[] = { 7364 VPSC->getChain(), DataOp, VPSC->getBasePtr(), Index, Scale, Mask, 7365 VPSC->getVectorLength()}; 7366 return DAG.getScatterVP(DAG.getVTList(MVT::Other), WideMemVT, SDLoc(N), Ops, 7367 VPSC->getMemOperand(), VPSC->getIndexType()); 7368 } 7369 7370 SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { 7371 SDValue InOp0 = GetWidenedVector(N->getOperand(0)); 7372 SDValue InOp1 = GetWidenedVector(N->getOperand(1)); 7373 SDLoc dl(N); 7374 EVT VT = N->getValueType(0); 7375 7376 // WARNING: In this code we widen the compare instruction with garbage. 7377 // This garbage may contain denormal floats which may be slow. Is this a real 7378 // concern ? Should we zero the unused lanes if this is a float compare ? 7379 7380 // Get a new SETCC node to compare the newly widened operands. 7381 // Only some of the compared elements are legal. 7382 EVT SVT = getSetCCResultType(InOp0.getValueType()); 7383 // The result type is legal, if its vXi1, keep vXi1 for the new SETCC. 7384 if (VT.getScalarType() == MVT::i1) 7385 SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 7386 SVT.getVectorElementCount()); 7387 7388 SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), 7389 SVT, InOp0, InOp1, N->getOperand(2)); 7390 7391 // Extract the needed results from the result vector. 7392 EVT ResVT = EVT::getVectorVT(*DAG.getContext(), 7393 SVT.getVectorElementType(), 7394 VT.getVectorElementCount()); 7395 SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, 7396 DAG.getVectorIdxConstant(0, dl)); 7397 7398 EVT OpVT = N->getOperand(0).getValueType(); 7399 ISD::NodeType ExtendCode = 7400 TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); 7401 return DAG.getNode(ExtendCode, dl, VT, CC); 7402 } 7403 7404 SDValue DAGTypeLegalizer::WidenVecOp_STRICT_FSETCC(SDNode *N) { 7405 SDValue Chain = N->getOperand(0); 7406 SDValue LHS = GetWidenedVector(N->getOperand(1)); 7407 SDValue RHS = GetWidenedVector(N->getOperand(2)); 7408 SDValue CC = N->getOperand(3); 7409 SDLoc dl(N); 7410 7411 EVT VT = N->getValueType(0); 7412 EVT EltVT = VT.getVectorElementType(); 7413 EVT TmpEltVT = LHS.getValueType().getVectorElementType(); 7414 unsigned NumElts = VT.getVectorNumElements(); 7415 7416 // Unroll into a build vector. 7417 SmallVector<SDValue, 8> Scalars(NumElts); 7418 SmallVector<SDValue, 8> Chains(NumElts); 7419 7420 for (unsigned i = 0; i != NumElts; ++i) { 7421 SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, 7422 DAG.getVectorIdxConstant(i, dl)); 7423 SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, 7424 DAG.getVectorIdxConstant(i, dl)); 7425 7426 Scalars[i] = DAG.getNode(N->getOpcode(), dl, {MVT::i1, MVT::Other}, 7427 {Chain, LHSElem, RHSElem, CC}); 7428 Chains[i] = Scalars[i].getValue(1); 7429 Scalars[i] = DAG.getSelect(dl, EltVT, Scalars[i], 7430 DAG.getBoolConstant(true, dl, EltVT, VT), 7431 DAG.getBoolConstant(false, dl, EltVT, VT)); 7432 } 7433 7434 SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); 7435 ReplaceValueWith(SDValue(N, 1), NewChain); 7436 7437 return DAG.getBuildVector(VT, dl, Scalars); 7438 } 7439 7440 static unsigned getExtendForIntVecReduction(unsigned Opc) { 7441 switch (Opc) { 7442 default: 7443 llvm_unreachable("Expected integer vector reduction"); 7444 case ISD::VECREDUCE_ADD: 7445 case ISD::VECREDUCE_MUL: 7446 case ISD::VECREDUCE_AND: 7447 case ISD::VECREDUCE_OR: 7448 case ISD::VECREDUCE_XOR: 7449 return ISD::ANY_EXTEND; 7450 case ISD::VECREDUCE_SMAX: 7451 case ISD::VECREDUCE_SMIN: 7452 return ISD::SIGN_EXTEND; 7453 case ISD::VECREDUCE_UMAX: 7454 case ISD::VECREDUCE_UMIN: 7455 return ISD::ZERO_EXTEND; 7456 } 7457 } 7458 7459 SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) { 7460 SDLoc dl(N); 7461 SDValue Op = GetWidenedVector(N->getOperand(0)); 7462 EVT VT = N->getValueType(0); 7463 EVT OrigVT = N->getOperand(0).getValueType(); 7464 EVT WideVT = Op.getValueType(); 7465 EVT ElemVT = OrigVT.getVectorElementType(); 7466 SDNodeFlags Flags = N->getFlags(); 7467 7468 unsigned Opc = N->getOpcode(); 7469 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc); 7470 SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags); 7471 assert(NeutralElem && "Neutral element must exist"); 7472 7473 // Pad the vector with the neutral element. 7474 unsigned OrigElts = OrigVT.getVectorMinNumElements(); 7475 unsigned WideElts = WideVT.getVectorMinNumElements(); 7476 7477 // Generate a vp.reduce_op if it is custom/legal for the target. This avoids 7478 // needing to pad the source vector, because the inactive lanes can simply be 7479 // disabled and not contribute to the result. 7480 if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc); 7481 VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) { 7482 SDValue Start = NeutralElem; 7483 if (VT.isInteger()) 7484 Start = DAG.getNode(getExtendForIntVecReduction(Opc), dl, VT, Start); 7485 assert(Start.getValueType() == VT); 7486 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 7487 WideVT.getVectorElementCount()); 7488 SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT); 7489 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(), 7490 OrigVT.getVectorElementCount()); 7491 return DAG.getNode(*VPOpcode, dl, VT, {Start, Op, Mask, EVL}, Flags); 7492 } 7493 7494 if (WideVT.isScalableVector()) { 7495 unsigned GCD = std::gcd(OrigElts, WideElts); 7496 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 7497 ElementCount::getScalable(GCD)); 7498 SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); 7499 for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD) 7500 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral, 7501 DAG.getVectorIdxConstant(Idx, dl)); 7502 return DAG.getNode(Opc, dl, VT, Op, Flags); 7503 } 7504 7505 for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) 7506 Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, 7507 DAG.getVectorIdxConstant(Idx, dl)); 7508 7509 return DAG.getNode(Opc, dl, VT, Op, Flags); 7510 } 7511 7512 SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE_SEQ(SDNode *N) { 7513 SDLoc dl(N); 7514 SDValue AccOp = N->getOperand(0); 7515 SDValue VecOp = N->getOperand(1); 7516 SDValue Op = GetWidenedVector(VecOp); 7517 7518 EVT VT = N->getValueType(0); 7519 EVT OrigVT = VecOp.getValueType(); 7520 EVT WideVT = Op.getValueType(); 7521 EVT ElemVT = OrigVT.getVectorElementType(); 7522 SDNodeFlags Flags = N->getFlags(); 7523 7524 unsigned Opc = N->getOpcode(); 7525 unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Opc); 7526 SDValue NeutralElem = DAG.getNeutralElement(BaseOpc, dl, ElemVT, Flags); 7527 7528 // Pad the vector with the neutral element. 7529 unsigned OrigElts = OrigVT.getVectorMinNumElements(); 7530 unsigned WideElts = WideVT.getVectorMinNumElements(); 7531 7532 // Generate a vp.reduce_op if it is custom/legal for the target. This avoids 7533 // needing to pad the source vector, because the inactive lanes can simply be 7534 // disabled and not contribute to the result. 7535 if (auto VPOpcode = ISD::getVPForBaseOpcode(Opc); 7536 VPOpcode && TLI.isOperationLegalOrCustom(*VPOpcode, WideVT)) { 7537 EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, 7538 WideVT.getVectorElementCount()); 7539 SDValue Mask = DAG.getAllOnesConstant(dl, WideMaskVT); 7540 SDValue EVL = DAG.getElementCount(dl, TLI.getVPExplicitVectorLengthTy(), 7541 OrigVT.getVectorElementCount()); 7542 return DAG.getNode(*VPOpcode, dl, VT, {AccOp, Op, Mask, EVL}, Flags); 7543 } 7544 7545 if (WideVT.isScalableVector()) { 7546 unsigned GCD = std::gcd(OrigElts, WideElts); 7547 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 7548 ElementCount::getScalable(GCD)); 7549 SDValue SplatNeutral = DAG.getSplatVector(SplatVT, dl, NeutralElem); 7550 for (unsigned Idx = OrigElts; Idx < WideElts; Idx = Idx + GCD) 7551 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Op, SplatNeutral, 7552 DAG.getVectorIdxConstant(Idx, dl)); 7553 return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags); 7554 } 7555 7556 for (unsigned Idx = OrigElts; Idx < WideElts; Idx++) 7557 Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem, 7558 DAG.getVectorIdxConstant(Idx, dl)); 7559 7560 return DAG.getNode(Opc, dl, VT, AccOp, Op, Flags); 7561 } 7562 7563 SDValue DAGTypeLegalizer::WidenVecOp_VP_REDUCE(SDNode *N) { 7564 assert(N->isVPOpcode() && "Expected VP opcode"); 7565 7566 SDLoc dl(N); 7567 SDValue Op = GetWidenedVector(N->getOperand(1)); 7568 SDValue Mask = GetWidenedMask(N->getOperand(2), 7569 Op.getValueType().getVectorElementCount()); 7570 7571 return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), 7572 {N->getOperand(0), Op, Mask, N->getOperand(3)}, 7573 N->getFlags()); 7574 } 7575 7576 SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) { 7577 // This only gets called in the case that the left and right inputs and 7578 // result are of a legal odd vector type, and the condition is illegal i1 of 7579 // the same odd width that needs widening. 7580 EVT VT = N->getValueType(0); 7581 assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT)); 7582 7583 SDValue Cond = GetWidenedVector(N->getOperand(0)); 7584 SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N)); 7585 SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N)); 7586 SDLoc DL(N); 7587 7588 SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond, 7589 LeftIn, RightIn); 7590 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Select, 7591 DAG.getVectorIdxConstant(0, DL)); 7592 } 7593 7594 SDValue DAGTypeLegalizer::WidenVecOp_VP_CttzElements(SDNode *N) { 7595 SDLoc DL(N); 7596 SDValue Source = GetWidenedVector(N->getOperand(0)); 7597 EVT SrcVT = Source.getValueType(); 7598 SDValue Mask = 7599 GetWidenedMask(N->getOperand(1), SrcVT.getVectorElementCount()); 7600 7601 return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), 7602 {Source, Mask, N->getOperand(2)}, N->getFlags()); 7603 } 7604 7605 //===----------------------------------------------------------------------===// 7606 // Vector Widening Utilities 7607 //===----------------------------------------------------------------------===// 7608 7609 // Utility function to find the type to chop up a widen vector for load/store 7610 // TLI: Target lowering used to determine legal types. 7611 // Width: Width left need to load/store. 7612 // WidenVT: The widen vector type to load to/store from 7613 // Align: If 0, don't allow use of a wider type 7614 // WidenEx: If Align is not 0, the amount additional we can load/store from. 7615 7616 static std::optional<EVT> findMemType(SelectionDAG &DAG, 7617 const TargetLowering &TLI, unsigned Width, 7618 EVT WidenVT, unsigned Align = 0, 7619 unsigned WidenEx = 0) { 7620 EVT WidenEltVT = WidenVT.getVectorElementType(); 7621 const bool Scalable = WidenVT.isScalableVector(); 7622 unsigned WidenWidth = WidenVT.getSizeInBits().getKnownMinValue(); 7623 unsigned WidenEltWidth = WidenEltVT.getSizeInBits(); 7624 unsigned AlignInBits = Align*8; 7625 7626 EVT RetVT = WidenEltVT; 7627 // Don't bother looking for an integer type if the vector is scalable, skip 7628 // to vector types. 7629 if (!Scalable) { 7630 // If we have one element to load/store, return it. 7631 if (Width == WidenEltWidth) 7632 return RetVT; 7633 7634 // See if there is larger legal integer than the element type to load/store. 7635 for (EVT MemVT : reverse(MVT::integer_valuetypes())) { 7636 unsigned MemVTWidth = MemVT.getSizeInBits(); 7637 if (MemVT.getSizeInBits() <= WidenEltWidth) 7638 break; 7639 auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); 7640 if ((Action == TargetLowering::TypeLegal || 7641 Action == TargetLowering::TypePromoteInteger) && 7642 (WidenWidth % MemVTWidth) == 0 && 7643 isPowerOf2_32(WidenWidth / MemVTWidth) && 7644 (MemVTWidth <= Width || 7645 (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { 7646 if (MemVTWidth == WidenWidth) 7647 return MemVT; 7648 RetVT = MemVT; 7649 break; 7650 } 7651 } 7652 } 7653 7654 // See if there is a larger vector type to load/store that has the same vector 7655 // element type and is evenly divisible with the WidenVT. 7656 for (EVT MemVT : reverse(MVT::vector_valuetypes())) { 7657 // Skip vector MVTs which don't match the scalable property of WidenVT. 7658 if (Scalable != MemVT.isScalableVector()) 7659 continue; 7660 unsigned MemVTWidth = MemVT.getSizeInBits().getKnownMinValue(); 7661 auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); 7662 if ((Action == TargetLowering::TypeLegal || 7663 Action == TargetLowering::TypePromoteInteger) && 7664 WidenEltVT == MemVT.getVectorElementType() && 7665 (WidenWidth % MemVTWidth) == 0 && 7666 isPowerOf2_32(WidenWidth / MemVTWidth) && 7667 (MemVTWidth <= Width || 7668 (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { 7669 if (RetVT.getFixedSizeInBits() < MemVTWidth || MemVT == WidenVT) 7670 return MemVT; 7671 } 7672 } 7673 7674 // Using element-wise loads and stores for widening operations is not 7675 // supported for scalable vectors 7676 if (Scalable) 7677 return std::nullopt; 7678 7679 return RetVT; 7680 } 7681 7682 // Builds a vector type from scalar loads 7683 // VecTy: Resulting Vector type 7684 // LDOps: Load operators to build a vector type 7685 // [Start,End) the list of loads to use. 7686 static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, 7687 SmallVectorImpl<SDValue> &LdOps, 7688 unsigned Start, unsigned End) { 7689 SDLoc dl(LdOps[Start]); 7690 EVT LdTy = LdOps[Start].getValueType(); 7691 unsigned Width = VecTy.getSizeInBits(); 7692 unsigned NumElts = Width / LdTy.getSizeInBits(); 7693 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), LdTy, NumElts); 7694 7695 unsigned Idx = 1; 7696 SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT,LdOps[Start]); 7697 7698 for (unsigned i = Start + 1; i != End; ++i) { 7699 EVT NewLdTy = LdOps[i].getValueType(); 7700 if (NewLdTy != LdTy) { 7701 NumElts = Width / NewLdTy.getSizeInBits(); 7702 NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts); 7703 VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp); 7704 // Readjust position and vector position based on new load type. 7705 Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits(); 7706 LdTy = NewLdTy; 7707 } 7708 VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], 7709 DAG.getVectorIdxConstant(Idx++, dl)); 7710 } 7711 return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); 7712 } 7713 7714 SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain, 7715 LoadSDNode *LD) { 7716 // The strategy assumes that we can efficiently load power-of-two widths. 7717 // The routine chops the vector into the largest vector loads with the same 7718 // element type or scalar loads and then recombines it to the widen vector 7719 // type. 7720 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); 7721 EVT LdVT = LD->getMemoryVT(); 7722 SDLoc dl(LD); 7723 assert(LdVT.isVector() && WidenVT.isVector()); 7724 assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); 7725 assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); 7726 7727 // Load information 7728 SDValue Chain = LD->getChain(); 7729 SDValue BasePtr = LD->getBasePtr(); 7730 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); 7731 AAMDNodes AAInfo = LD->getAAInfo(); 7732 7733 TypeSize LdWidth = LdVT.getSizeInBits(); 7734 TypeSize WidenWidth = WidenVT.getSizeInBits(); 7735 TypeSize WidthDiff = WidenWidth - LdWidth; 7736 // Allow wider loads if they are sufficiently aligned to avoid memory faults 7737 // and if the original load is simple. 7738 unsigned LdAlign = 7739 (!LD->isSimple() || LdVT.isScalableVector()) ? 0 : LD->getAlign().value(); 7740 7741 // Find the vector type that can load from. 7742 std::optional<EVT> FirstVT = 7743 findMemType(DAG, TLI, LdWidth.getKnownMinValue(), WidenVT, LdAlign, 7744 WidthDiff.getKnownMinValue()); 7745 7746 if (!FirstVT) 7747 return SDValue(); 7748 7749 SmallVector<EVT, 8> MemVTs; 7750 TypeSize FirstVTWidth = FirstVT->getSizeInBits(); 7751 7752 // Unless we're able to load in one instruction we must work out how to load 7753 // the remainder. 7754 if (!TypeSize::isKnownLE(LdWidth, FirstVTWidth)) { 7755 std::optional<EVT> NewVT = FirstVT; 7756 TypeSize RemainingWidth = LdWidth; 7757 TypeSize NewVTWidth = FirstVTWidth; 7758 do { 7759 RemainingWidth -= NewVTWidth; 7760 if (TypeSize::isKnownLT(RemainingWidth, NewVTWidth)) { 7761 // The current type we are using is too large. Find a better size. 7762 NewVT = findMemType(DAG, TLI, RemainingWidth.getKnownMinValue(), 7763 WidenVT, LdAlign, WidthDiff.getKnownMinValue()); 7764 if (!NewVT) 7765 return SDValue(); 7766 NewVTWidth = NewVT->getSizeInBits(); 7767 } 7768 MemVTs.push_back(*NewVT); 7769 } while (TypeSize::isKnownGT(RemainingWidth, NewVTWidth)); 7770 } 7771 7772 SDValue LdOp = DAG.getLoad(*FirstVT, dl, Chain, BasePtr, LD->getPointerInfo(), 7773 LD->getOriginalAlign(), MMOFlags, AAInfo); 7774 LdChain.push_back(LdOp.getValue(1)); 7775 7776 // Check if we can load the element with one instruction. 7777 if (MemVTs.empty()) { 7778 assert(TypeSize::isKnownLE(LdWidth, FirstVTWidth)); 7779 if (!FirstVT->isVector()) { 7780 unsigned NumElts = 7781 WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); 7782 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), *FirstVT, NumElts); 7783 SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp); 7784 return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp); 7785 } 7786 if (FirstVT == WidenVT) 7787 return LdOp; 7788 7789 // TODO: We don't currently have any tests that exercise this code path. 7790 assert(WidenWidth.getFixedValue() % FirstVTWidth.getFixedValue() == 0); 7791 unsigned NumConcat = 7792 WidenWidth.getFixedValue() / FirstVTWidth.getFixedValue(); 7793 SmallVector<SDValue, 16> ConcatOps(NumConcat); 7794 SDValue UndefVal = DAG.getUNDEF(*FirstVT); 7795 ConcatOps[0] = LdOp; 7796 for (unsigned i = 1; i != NumConcat; ++i) 7797 ConcatOps[i] = UndefVal; 7798 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, ConcatOps); 7799 } 7800 7801 // Load vector by using multiple loads from largest vector to scalar. 7802 SmallVector<SDValue, 16> LdOps; 7803 LdOps.push_back(LdOp); 7804 7805 uint64_t ScaledOffset = 0; 7806 MachinePointerInfo MPI = LD->getPointerInfo(); 7807 7808 // First incremement past the first load. 7809 IncrementPointer(cast<LoadSDNode>(LdOp), *FirstVT, MPI, BasePtr, 7810 &ScaledOffset); 7811 7812 for (EVT MemVT : MemVTs) { 7813 Align NewAlign = ScaledOffset == 0 7814 ? LD->getOriginalAlign() 7815 : commonAlignment(LD->getAlign(), ScaledOffset); 7816 SDValue L = 7817 DAG.getLoad(MemVT, dl, Chain, BasePtr, MPI, NewAlign, MMOFlags, AAInfo); 7818 7819 LdOps.push_back(L); 7820 LdChain.push_back(L.getValue(1)); 7821 IncrementPointer(cast<LoadSDNode>(L), MemVT, MPI, BasePtr, &ScaledOffset); 7822 } 7823 7824 // Build the vector from the load operations. 7825 unsigned End = LdOps.size(); 7826 if (!LdOps[0].getValueType().isVector()) 7827 // All the loads are scalar loads. 7828 return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End); 7829 7830 // If the load contains vectors, build the vector using concat vector. 7831 // All of the vectors used to load are power-of-2, and the scalar loads can be 7832 // combined to make a power-of-2 vector. 7833 SmallVector<SDValue, 16> ConcatOps(End); 7834 int i = End - 1; 7835 int Idx = End; 7836 EVT LdTy = LdOps[i].getValueType(); 7837 // First, combine the scalar loads to a vector. 7838 if (!LdTy.isVector()) { 7839 for (--i; i >= 0; --i) { 7840 LdTy = LdOps[i].getValueType(); 7841 if (LdTy.isVector()) 7842 break; 7843 } 7844 ConcatOps[--Idx] = BuildVectorFromScalar(DAG, LdTy, LdOps, i + 1, End); 7845 } 7846 7847 ConcatOps[--Idx] = LdOps[i]; 7848 for (--i; i >= 0; --i) { 7849 EVT NewLdTy = LdOps[i].getValueType(); 7850 if (NewLdTy != LdTy) { 7851 // Create a larger vector. 7852 TypeSize LdTySize = LdTy.getSizeInBits(); 7853 TypeSize NewLdTySize = NewLdTy.getSizeInBits(); 7854 assert(NewLdTySize.isScalable() == LdTySize.isScalable() && 7855 NewLdTySize.isKnownMultipleOf(LdTySize.getKnownMinValue())); 7856 unsigned NumOps = 7857 NewLdTySize.getKnownMinValue() / LdTySize.getKnownMinValue(); 7858 SmallVector<SDValue, 16> WidenOps(NumOps); 7859 unsigned j = 0; 7860 for (; j != End-Idx; ++j) 7861 WidenOps[j] = ConcatOps[Idx+j]; 7862 for (; j != NumOps; ++j) 7863 WidenOps[j] = DAG.getUNDEF(LdTy); 7864 7865 ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy, 7866 WidenOps); 7867 Idx = End - 1; 7868 LdTy = NewLdTy; 7869 } 7870 ConcatOps[--Idx] = LdOps[i]; 7871 } 7872 7873 if (WidenWidth == LdTy.getSizeInBits() * (End - Idx)) 7874 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, 7875 ArrayRef(&ConcatOps[Idx], End - Idx)); 7876 7877 // We need to fill the rest with undefs to build the vector. 7878 unsigned NumOps = 7879 WidenWidth.getKnownMinValue() / LdTy.getSizeInBits().getKnownMinValue(); 7880 SmallVector<SDValue, 16> WidenOps(NumOps); 7881 SDValue UndefVal = DAG.getUNDEF(LdTy); 7882 { 7883 unsigned i = 0; 7884 for (; i != End-Idx; ++i) 7885 WidenOps[i] = ConcatOps[Idx+i]; 7886 for (; i != NumOps; ++i) 7887 WidenOps[i] = UndefVal; 7888 } 7889 return DAG.getNode(ISD::CONCAT_VECTORS, dl, WidenVT, WidenOps); 7890 } 7891 7892 SDValue 7893 DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl<SDValue> &LdChain, 7894 LoadSDNode *LD, 7895 ISD::LoadExtType ExtType) { 7896 // For extension loads, it may not be more efficient to chop up the vector 7897 // and then extend it. Instead, we unroll the load and build a new vector. 7898 EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); 7899 EVT LdVT = LD->getMemoryVT(); 7900 SDLoc dl(LD); 7901 assert(LdVT.isVector() && WidenVT.isVector()); 7902 assert(LdVT.isScalableVector() == WidenVT.isScalableVector()); 7903 7904 // Load information 7905 SDValue Chain = LD->getChain(); 7906 SDValue BasePtr = LD->getBasePtr(); 7907 MachineMemOperand::Flags MMOFlags = LD->getMemOperand()->getFlags(); 7908 AAMDNodes AAInfo = LD->getAAInfo(); 7909 7910 if (LdVT.isScalableVector()) 7911 report_fatal_error("Generating widen scalable extending vector loads is " 7912 "not yet supported"); 7913 7914 EVT EltVT = WidenVT.getVectorElementType(); 7915 EVT LdEltVT = LdVT.getVectorElementType(); 7916 unsigned NumElts = LdVT.getVectorNumElements(); 7917 7918 // Load each element and widen. 7919 unsigned WidenNumElts = WidenVT.getVectorNumElements(); 7920 SmallVector<SDValue, 16> Ops(WidenNumElts); 7921 unsigned Increment = LdEltVT.getSizeInBits() / 8; 7922 Ops[0] = 7923 DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), 7924 LdEltVT, LD->getOriginalAlign(), MMOFlags, AAInfo); 7925 LdChain.push_back(Ops[0].getValue(1)); 7926 unsigned i = 0, Offset = Increment; 7927 for (i=1; i < NumElts; ++i, Offset += Increment) { 7928 SDValue NewBasePtr = 7929 DAG.getObjectPtrOffset(dl, BasePtr, TypeSize::getFixed(Offset)); 7930 Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, 7931 LD->getPointerInfo().getWithOffset(Offset), LdEltVT, 7932 LD->getOriginalAlign(), MMOFlags, AAInfo); 7933 LdChain.push_back(Ops[i].getValue(1)); 7934 } 7935 7936 // Fill the rest with undefs. 7937 SDValue UndefVal = DAG.getUNDEF(EltVT); 7938 for (; i != WidenNumElts; ++i) 7939 Ops[i] = UndefVal; 7940 7941 return DAG.getBuildVector(WidenVT, dl, Ops); 7942 } 7943 7944 bool DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl<SDValue> &StChain, 7945 StoreSDNode *ST) { 7946 // The strategy assumes that we can efficiently store power-of-two widths. 7947 // The routine chops the vector into the largest vector stores with the same 7948 // element type or scalar stores. 7949 SDValue Chain = ST->getChain(); 7950 SDValue BasePtr = ST->getBasePtr(); 7951 MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags(); 7952 AAMDNodes AAInfo = ST->getAAInfo(); 7953 SDValue ValOp = GetWidenedVector(ST->getValue()); 7954 SDLoc dl(ST); 7955 7956 EVT StVT = ST->getMemoryVT(); 7957 TypeSize StWidth = StVT.getSizeInBits(); 7958 EVT ValVT = ValOp.getValueType(); 7959 TypeSize ValWidth = ValVT.getSizeInBits(); 7960 EVT ValEltVT = ValVT.getVectorElementType(); 7961 unsigned ValEltWidth = ValEltVT.getFixedSizeInBits(); 7962 assert(StVT.getVectorElementType() == ValEltVT); 7963 assert(StVT.isScalableVector() == ValVT.isScalableVector() && 7964 "Mismatch between store and value types"); 7965 7966 int Idx = 0; // current index to store 7967 7968 MachinePointerInfo MPI = ST->getPointerInfo(); 7969 uint64_t ScaledOffset = 0; 7970 7971 // A breakdown of how to widen this vector store. Each element of the vector 7972 // is a memory VT combined with the number of times it is to be stored to, 7973 // e,g., v5i32 -> {{v2i32,2},{i32,1}} 7974 SmallVector<std::pair<EVT, unsigned>, 4> MemVTs; 7975 7976 while (StWidth.isNonZero()) { 7977 // Find the largest vector type we can store with. 7978 std::optional<EVT> NewVT = 7979 findMemType(DAG, TLI, StWidth.getKnownMinValue(), ValVT); 7980 if (!NewVT) 7981 return false; 7982 MemVTs.push_back({*NewVT, 0}); 7983 TypeSize NewVTWidth = NewVT->getSizeInBits(); 7984 7985 do { 7986 StWidth -= NewVTWidth; 7987 MemVTs.back().second++; 7988 } while (StWidth.isNonZero() && TypeSize::isKnownGE(StWidth, NewVTWidth)); 7989 } 7990 7991 for (const auto &Pair : MemVTs) { 7992 EVT NewVT = Pair.first; 7993 unsigned Count = Pair.second; 7994 TypeSize NewVTWidth = NewVT.getSizeInBits(); 7995 7996 if (NewVT.isVector()) { 7997 unsigned NumVTElts = NewVT.getVectorMinNumElements(); 7998 do { 7999 Align NewAlign = ScaledOffset == 0 8000 ? ST->getOriginalAlign() 8001 : commonAlignment(ST->getAlign(), ScaledOffset); 8002 SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, 8003 DAG.getVectorIdxConstant(Idx, dl)); 8004 SDValue PartStore = DAG.getStore(Chain, dl, EOp, BasePtr, MPI, NewAlign, 8005 MMOFlags, AAInfo); 8006 StChain.push_back(PartStore); 8007 8008 Idx += NumVTElts; 8009 IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr, 8010 &ScaledOffset); 8011 } while (--Count); 8012 } else { 8013 // Cast the vector to the scalar type we can store. 8014 unsigned NumElts = ValWidth.getFixedValue() / NewVTWidth.getFixedValue(); 8015 EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts); 8016 SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp); 8017 // Readjust index position based on new vector type. 8018 Idx = Idx * ValEltWidth / NewVTWidth.getFixedValue(); 8019 do { 8020 SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, 8021 DAG.getVectorIdxConstant(Idx++, dl)); 8022 SDValue PartStore = 8023 DAG.getStore(Chain, dl, EOp, BasePtr, MPI, ST->getOriginalAlign(), 8024 MMOFlags, AAInfo); 8025 StChain.push_back(PartStore); 8026 8027 IncrementPointer(cast<StoreSDNode>(PartStore), NewVT, MPI, BasePtr); 8028 } while (--Count); 8029 // Restore index back to be relative to the original widen element type. 8030 Idx = Idx * NewVTWidth.getFixedValue() / ValEltWidth; 8031 } 8032 } 8033 8034 return true; 8035 } 8036 8037 /// Modifies a vector input (widen or narrows) to a vector of NVT. The 8038 /// input vector must have the same element type as NVT. 8039 /// FillWithZeroes specifies that the vector should be widened with zeroes. 8040 SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT, 8041 bool FillWithZeroes) { 8042 // Note that InOp might have been widened so it might already have 8043 // the right width or it might need be narrowed. 8044 EVT InVT = InOp.getValueType(); 8045 assert(InVT.getVectorElementType() == NVT.getVectorElementType() && 8046 "input and widen element type must match"); 8047 assert(InVT.isScalableVector() == NVT.isScalableVector() && 8048 "cannot modify scalable vectors in this way"); 8049 SDLoc dl(InOp); 8050 8051 // Check if InOp already has the right width. 8052 if (InVT == NVT) 8053 return InOp; 8054 8055 ElementCount InEC = InVT.getVectorElementCount(); 8056 ElementCount WidenEC = NVT.getVectorElementCount(); 8057 if (WidenEC.hasKnownScalarFactor(InEC)) { 8058 unsigned NumConcat = WidenEC.getKnownScalarFactor(InEC); 8059 SmallVector<SDValue, 16> Ops(NumConcat); 8060 SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, InVT) : 8061 DAG.getUNDEF(InVT); 8062 Ops[0] = InOp; 8063 for (unsigned i = 1; i != NumConcat; ++i) 8064 Ops[i] = FillVal; 8065 8066 return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, Ops); 8067 } 8068 8069 if (InEC.hasKnownScalarFactor(WidenEC)) 8070 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, 8071 DAG.getVectorIdxConstant(0, dl)); 8072 8073 assert(!InVT.isScalableVector() && !NVT.isScalableVector() && 8074 "Scalable vectors should have been handled already."); 8075 8076 unsigned InNumElts = InEC.getFixedValue(); 8077 unsigned WidenNumElts = WidenEC.getFixedValue(); 8078 8079 // Fall back to extract and build (+ mask, if padding with zeros). 8080 SmallVector<SDValue, 16> Ops(WidenNumElts); 8081 EVT EltVT = NVT.getVectorElementType(); 8082 unsigned MinNumElts = std::min(WidenNumElts, InNumElts); 8083 unsigned Idx; 8084 for (Idx = 0; Idx < MinNumElts; ++Idx) 8085 Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, 8086 DAG.getVectorIdxConstant(Idx, dl)); 8087 8088 SDValue UndefVal = DAG.getUNDEF(EltVT); 8089 for (; Idx < WidenNumElts; ++Idx) 8090 Ops[Idx] = UndefVal; 8091 8092 SDValue Widened = DAG.getBuildVector(NVT, dl, Ops); 8093 if (!FillWithZeroes) 8094 return Widened; 8095 8096 assert(NVT.isInteger() && 8097 "We expect to never want to FillWithZeroes for non-integral types."); 8098 8099 SmallVector<SDValue, 16> MaskOps; 8100 MaskOps.append(MinNumElts, DAG.getAllOnesConstant(dl, EltVT)); 8101 MaskOps.append(WidenNumElts - MinNumElts, DAG.getConstant(0, dl, EltVT)); 8102 8103 return DAG.getNode(ISD::AND, dl, NVT, Widened, 8104 DAG.getBuildVector(NVT, dl, MaskOps)); 8105 } 8106