1 //===-- SystemZISelLowering.cpp - SystemZ DAG lowering implementation -----===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the SystemZTargetLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "SystemZISelLowering.h" 14 #include "SystemZCallingConv.h" 15 #include "SystemZConstantPoolValue.h" 16 #include "SystemZMachineFunctionInfo.h" 17 #include "SystemZTargetMachine.h" 18 #include "llvm/CodeGen/CallingConvLower.h" 19 #include "llvm/CodeGen/ISDOpcodes.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 23 #include "llvm/IR/GlobalAlias.h" 24 #include "llvm/IR/IntrinsicInst.h" 25 #include "llvm/IR/Intrinsics.h" 26 #include "llvm/IR/IntrinsicsS390.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/KnownBits.h" 30 #include <cctype> 31 #include <optional> 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "systemz-lower" 36 37 // Temporarily let this be disabled by default until all known problems 38 // related to argument extensions are fixed. 39 static cl::opt<bool> EnableIntArgExtCheck( 40 "argext-abi-check", cl::init(false), 41 cl::desc("Verify that narrow int args are properly extended per the " 42 "SystemZ ABI.")); 43 44 namespace { 45 // Represents information about a comparison. 46 struct Comparison { 47 Comparison(SDValue Op0In, SDValue Op1In, SDValue ChainIn) 48 : Op0(Op0In), Op1(Op1In), Chain(ChainIn), 49 Opcode(0), ICmpType(0), CCValid(0), CCMask(0) {} 50 51 // The operands to the comparison. 52 SDValue Op0, Op1; 53 54 // Chain if this is a strict floating-point comparison. 55 SDValue Chain; 56 57 // The opcode that should be used to compare Op0 and Op1. 58 unsigned Opcode; 59 60 // A SystemZICMP value. Only used for integer comparisons. 61 unsigned ICmpType; 62 63 // The mask of CC values that Opcode can produce. 64 unsigned CCValid; 65 66 // The mask of CC values for which the original condition is true. 67 unsigned CCMask; 68 }; 69 } // end anonymous namespace 70 71 // Classify VT as either 32 or 64 bit. 72 static bool is32Bit(EVT VT) { 73 switch (VT.getSimpleVT().SimpleTy) { 74 case MVT::i32: 75 return true; 76 case MVT::i64: 77 return false; 78 default: 79 llvm_unreachable("Unsupported type"); 80 } 81 } 82 83 // Return a version of MachineOperand that can be safely used before the 84 // final use. 85 static MachineOperand earlyUseOperand(MachineOperand Op) { 86 if (Op.isReg()) 87 Op.setIsKill(false); 88 return Op; 89 } 90 91 SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM, 92 const SystemZSubtarget &STI) 93 : TargetLowering(TM), Subtarget(STI) { 94 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); 95 96 auto *Regs = STI.getSpecialRegisters(); 97 98 // Set up the register classes. 99 if (Subtarget.hasHighWord()) 100 addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); 101 else 102 addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); 103 addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); 104 if (!useSoftFloat()) { 105 if (Subtarget.hasVector()) { 106 addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); 107 addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); 108 } else { 109 addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); 110 addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); 111 } 112 if (Subtarget.hasVectorEnhancements1()) 113 addRegisterClass(MVT::f128, &SystemZ::VR128BitRegClass); 114 else 115 addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); 116 117 if (Subtarget.hasVector()) { 118 addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); 119 addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); 120 addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); 121 addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); 122 addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); 123 addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); 124 } 125 126 if (Subtarget.hasVector()) 127 addRegisterClass(MVT::i128, &SystemZ::VR128BitRegClass); 128 } 129 130 // Compute derived properties from the register classes 131 computeRegisterProperties(Subtarget.getRegisterInfo()); 132 133 // Set up special registers. 134 setStackPointerRegisterToSaveRestore(Regs->getStackPointerRegister()); 135 136 // TODO: It may be better to default to latency-oriented scheduling, however 137 // LLVM's current latency-oriented scheduler can't handle physreg definitions 138 // such as SystemZ has with CC, so set this to the register-pressure 139 // scheduler, because it can. 140 setSchedulingPreference(Sched::RegPressure); 141 142 setBooleanContents(ZeroOrOneBooleanContent); 143 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 144 145 setMaxAtomicSizeInBitsSupported(128); 146 147 // Instructions are strings of 2-byte aligned 2-byte values. 148 setMinFunctionAlignment(Align(2)); 149 // For performance reasons we prefer 16-byte alignment. 150 setPrefFunctionAlignment(Align(16)); 151 152 // Handle operations that are handled in a similar way for all types. 153 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 154 I <= MVT::LAST_FP_VALUETYPE; 155 ++I) { 156 MVT VT = MVT::SimpleValueType(I); 157 if (isTypeLegal(VT)) { 158 // Lower SET_CC into an IPM-based sequence. 159 setOperationAction(ISD::SETCC, VT, Custom); 160 setOperationAction(ISD::STRICT_FSETCC, VT, Custom); 161 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); 162 163 // Expand SELECT(C, A, B) into SELECT_CC(X, 0, A, B, NE). 164 setOperationAction(ISD::SELECT, VT, Expand); 165 166 // Lower SELECT_CC and BR_CC into separate comparisons and branches. 167 setOperationAction(ISD::SELECT_CC, VT, Custom); 168 setOperationAction(ISD::BR_CC, VT, Custom); 169 } 170 } 171 172 // Expand jump table branches as address arithmetic followed by an 173 // indirect jump. 174 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 175 176 // Expand BRCOND into a BR_CC (see above). 177 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 178 179 // Handle integer types except i128. 180 for (unsigned I = MVT::FIRST_INTEGER_VALUETYPE; 181 I <= MVT::LAST_INTEGER_VALUETYPE; 182 ++I) { 183 MVT VT = MVT::SimpleValueType(I); 184 if (isTypeLegal(VT) && VT != MVT::i128) { 185 setOperationAction(ISD::ABS, VT, Legal); 186 187 // Expand individual DIV and REMs into DIVREMs. 188 setOperationAction(ISD::SDIV, VT, Expand); 189 setOperationAction(ISD::UDIV, VT, Expand); 190 setOperationAction(ISD::SREM, VT, Expand); 191 setOperationAction(ISD::UREM, VT, Expand); 192 setOperationAction(ISD::SDIVREM, VT, Custom); 193 setOperationAction(ISD::UDIVREM, VT, Custom); 194 195 // Support addition/subtraction with overflow. 196 setOperationAction(ISD::SADDO, VT, Custom); 197 setOperationAction(ISD::SSUBO, VT, Custom); 198 199 // Support addition/subtraction with carry. 200 setOperationAction(ISD::UADDO, VT, Custom); 201 setOperationAction(ISD::USUBO, VT, Custom); 202 203 // Support carry in as value rather than glue. 204 setOperationAction(ISD::UADDO_CARRY, VT, Custom); 205 setOperationAction(ISD::USUBO_CARRY, VT, Custom); 206 207 // Lower ATOMIC_LOAD_SUB into ATOMIC_LOAD_ADD if LAA and LAAG are 208 // available, or if the operand is constant. 209 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); 210 211 // Use POPCNT on z196 and above. 212 if (Subtarget.hasPopulationCount()) 213 setOperationAction(ISD::CTPOP, VT, Custom); 214 else 215 setOperationAction(ISD::CTPOP, VT, Expand); 216 217 // No special instructions for these. 218 setOperationAction(ISD::CTTZ, VT, Expand); 219 setOperationAction(ISD::ROTR, VT, Expand); 220 221 // Use *MUL_LOHI where possible instead of MULH*. 222 setOperationAction(ISD::MULHS, VT, Expand); 223 setOperationAction(ISD::MULHU, VT, Expand); 224 setOperationAction(ISD::SMUL_LOHI, VT, Custom); 225 setOperationAction(ISD::UMUL_LOHI, VT, Custom); 226 227 // Only z196 and above have native support for conversions to unsigned. 228 // On z10, promoting to i64 doesn't generate an inexact condition for 229 // values that are outside the i32 range but in the i64 range, so use 230 // the default expansion. 231 if (!Subtarget.hasFPExtension()) 232 setOperationAction(ISD::FP_TO_UINT, VT, Expand); 233 234 // Mirror those settings for STRICT_FP_TO_[SU]INT. Note that these all 235 // default to Expand, so need to be modified to Legal where appropriate. 236 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Legal); 237 if (Subtarget.hasFPExtension()) 238 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Legal); 239 240 // And similarly for STRICT_[SU]INT_TO_FP. 241 setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Legal); 242 if (Subtarget.hasFPExtension()) 243 setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Legal); 244 } 245 } 246 247 // Handle i128 if legal. 248 if (isTypeLegal(MVT::i128)) { 249 // No special instructions for these. 250 setOperationAction(ISD::SDIVREM, MVT::i128, Expand); 251 setOperationAction(ISD::UDIVREM, MVT::i128, Expand); 252 setOperationAction(ISD::SMUL_LOHI, MVT::i128, Expand); 253 setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand); 254 setOperationAction(ISD::ROTR, MVT::i128, Expand); 255 setOperationAction(ISD::ROTL, MVT::i128, Expand); 256 257 // No special instructions for these before arch15. 258 if (!Subtarget.hasVectorEnhancements3()) { 259 setOperationAction(ISD::MUL, MVT::i128, Expand); 260 setOperationAction(ISD::MULHS, MVT::i128, Expand); 261 setOperationAction(ISD::MULHU, MVT::i128, Expand); 262 setOperationAction(ISD::SDIV, MVT::i128, Expand); 263 setOperationAction(ISD::UDIV, MVT::i128, Expand); 264 setOperationAction(ISD::SREM, MVT::i128, Expand); 265 setOperationAction(ISD::UREM, MVT::i128, Expand); 266 setOperationAction(ISD::CTLZ, MVT::i128, Expand); 267 setOperationAction(ISD::CTTZ, MVT::i128, Expand); 268 } else { 269 // Even if we do have a legal 128-bit multiply, we do not 270 // want 64-bit multiply-high operations to use it. 271 setOperationAction(ISD::MULHS, MVT::i64, Custom); 272 setOperationAction(ISD::MULHU, MVT::i64, Custom); 273 } 274 275 // Support addition/subtraction with carry. 276 setOperationAction(ISD::UADDO, MVT::i128, Custom); 277 setOperationAction(ISD::USUBO, MVT::i128, Custom); 278 setOperationAction(ISD::UADDO_CARRY, MVT::i128, Custom); 279 setOperationAction(ISD::USUBO_CARRY, MVT::i128, Custom); 280 281 // Use VPOPCT and add up partial results. 282 setOperationAction(ISD::CTPOP, MVT::i128, Custom); 283 284 // Additional instructions available with arch15. 285 if (Subtarget.hasVectorEnhancements3()) { 286 setOperationAction(ISD::ABS, MVT::i128, Legal); 287 } 288 289 // We have to use libcalls for these. 290 setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall); 291 setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall); 292 setOperationAction(ISD::UINT_TO_FP, MVT::i128, LibCall); 293 setOperationAction(ISD::SINT_TO_FP, MVT::i128, LibCall); 294 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, LibCall); 295 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, LibCall); 296 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, LibCall); 297 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, LibCall); 298 } 299 300 // Type legalization will convert 8- and 16-bit atomic operations into 301 // forms that operate on i32s (but still keeping the original memory VT). 302 // Lower them into full i32 operations. 303 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Custom); 304 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Custom); 305 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom); 306 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom); 307 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Custom); 308 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Custom); 309 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Custom); 310 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Custom); 311 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Custom); 312 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Custom); 313 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Custom); 314 315 // Whether or not i128 is not a legal type, we need to custom lower 316 // the atomic operations in order to exploit SystemZ instructions. 317 setOperationAction(ISD::ATOMIC_LOAD, MVT::i128, Custom); 318 setOperationAction(ISD::ATOMIC_STORE, MVT::i128, Custom); 319 setOperationAction(ISD::ATOMIC_LOAD, MVT::f128, Custom); 320 setOperationAction(ISD::ATOMIC_STORE, MVT::f128, Custom); 321 322 // Mark sign/zero extending atomic loads as legal, which will make 323 // DAGCombiner fold extensions into atomic loads if possible. 324 setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, 325 {MVT::i8, MVT::i16, MVT::i32}, Legal); 326 setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i32, 327 {MVT::i8, MVT::i16}, Legal); 328 setAtomicLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i16, 329 MVT::i8, Legal); 330 331 // We can use the CC result of compare-and-swap to implement 332 // the "success" result of ATOMIC_CMP_SWAP_WITH_SUCCESS. 333 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Custom); 334 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Custom); 335 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); 336 337 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 338 339 // Traps are legal, as we will convert them to "j .+2". 340 setOperationAction(ISD::TRAP, MVT::Other, Legal); 341 342 // z10 has instructions for signed but not unsigned FP conversion. 343 // Handle unsigned 32-bit types as signed 64-bit types. 344 if (!Subtarget.hasFPExtension()) { 345 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); 346 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 347 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Promote); 348 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Expand); 349 } 350 351 // We have native support for a 64-bit CTLZ, via FLOGR. 352 setOperationAction(ISD::CTLZ, MVT::i32, Promote); 353 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote); 354 setOperationAction(ISD::CTLZ, MVT::i64, Legal); 355 356 // On arch15 we have native support for a 64-bit CTTZ. 357 if (Subtarget.hasMiscellaneousExtensions4()) { 358 setOperationAction(ISD::CTTZ, MVT::i32, Promote); 359 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Promote); 360 setOperationAction(ISD::CTTZ, MVT::i64, Legal); 361 } 362 363 // On z15 we have native support for a 64-bit CTPOP. 364 if (Subtarget.hasMiscellaneousExtensions3()) { 365 setOperationAction(ISD::CTPOP, MVT::i32, Promote); 366 setOperationAction(ISD::CTPOP, MVT::i64, Legal); 367 } 368 369 // Give LowerOperation the chance to replace 64-bit ORs with subregs. 370 setOperationAction(ISD::OR, MVT::i64, Custom); 371 372 // Expand 128 bit shifts without using a libcall. 373 setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); 374 setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); 375 setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); 376 377 // Also expand 256 bit shifts if i128 is a legal type. 378 if (isTypeLegal(MVT::i128)) { 379 setOperationAction(ISD::SRL_PARTS, MVT::i128, Expand); 380 setOperationAction(ISD::SHL_PARTS, MVT::i128, Expand); 381 setOperationAction(ISD::SRA_PARTS, MVT::i128, Expand); 382 } 383 384 // Handle bitcast from fp128 to i128. 385 if (!isTypeLegal(MVT::i128)) 386 setOperationAction(ISD::BITCAST, MVT::i128, Custom); 387 388 // We have native instructions for i8, i16 and i32 extensions, but not i1. 389 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 390 for (MVT VT : MVT::integer_valuetypes()) { 391 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); 392 setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote); 393 setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote); 394 } 395 396 // Handle the various types of symbolic address. 397 setOperationAction(ISD::ConstantPool, PtrVT, Custom); 398 setOperationAction(ISD::GlobalAddress, PtrVT, Custom); 399 setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom); 400 setOperationAction(ISD::BlockAddress, PtrVT, Custom); 401 setOperationAction(ISD::JumpTable, PtrVT, Custom); 402 403 // We need to handle dynamic allocations specially because of the 404 // 160-byte area at the bottom of the stack. 405 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); 406 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, PtrVT, Custom); 407 408 setOperationAction(ISD::STACKSAVE, MVT::Other, Custom); 409 setOperationAction(ISD::STACKRESTORE, MVT::Other, Custom); 410 411 // Handle prefetches with PFD or PFDRL. 412 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 413 414 // Handle readcyclecounter with STCKF. 415 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 416 417 for (MVT VT : MVT::fixedlen_vector_valuetypes()) { 418 // Assume by default that all vector operations need to be expanded. 419 for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) 420 if (getOperationAction(Opcode, VT) == Legal) 421 setOperationAction(Opcode, VT, Expand); 422 423 // Likewise all truncating stores and extending loads. 424 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { 425 setTruncStoreAction(VT, InnerVT, Expand); 426 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); 427 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); 428 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); 429 } 430 431 if (isTypeLegal(VT)) { 432 // These operations are legal for anything that can be stored in a 433 // vector register, even if there is no native support for the format 434 // as such. In particular, we can do these for v4f32 even though there 435 // are no specific instructions for that format. 436 setOperationAction(ISD::LOAD, VT, Legal); 437 setOperationAction(ISD::STORE, VT, Legal); 438 setOperationAction(ISD::VSELECT, VT, Legal); 439 setOperationAction(ISD::BITCAST, VT, Legal); 440 setOperationAction(ISD::UNDEF, VT, Legal); 441 442 // Likewise, except that we need to replace the nodes with something 443 // more specific. 444 setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 445 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 446 } 447 } 448 449 // Handle integer vector types. 450 for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) { 451 if (isTypeLegal(VT)) { 452 // These operations have direct equivalents. 453 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); 454 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); 455 setOperationAction(ISD::ADD, VT, Legal); 456 setOperationAction(ISD::SUB, VT, Legal); 457 if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3()) 458 setOperationAction(ISD::MUL, VT, Legal); 459 if (Subtarget.hasVectorEnhancements3() && 460 VT != MVT::v16i8 && VT != MVT::v8i16) { 461 setOperationAction(ISD::SDIV, VT, Legal); 462 setOperationAction(ISD::UDIV, VT, Legal); 463 setOperationAction(ISD::SREM, VT, Legal); 464 setOperationAction(ISD::UREM, VT, Legal); 465 } 466 setOperationAction(ISD::ABS, VT, Legal); 467 setOperationAction(ISD::AND, VT, Legal); 468 setOperationAction(ISD::OR, VT, Legal); 469 setOperationAction(ISD::XOR, VT, Legal); 470 if (Subtarget.hasVectorEnhancements1()) 471 setOperationAction(ISD::CTPOP, VT, Legal); 472 else 473 setOperationAction(ISD::CTPOP, VT, Custom); 474 setOperationAction(ISD::CTTZ, VT, Legal); 475 setOperationAction(ISD::CTLZ, VT, Legal); 476 477 // Convert a GPR scalar to a vector by inserting it into element 0. 478 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); 479 480 // Use a series of unpacks for extensions. 481 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); 482 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); 483 484 // Detect shifts/rotates by a scalar amount and convert them into 485 // V*_BY_SCALAR. 486 setOperationAction(ISD::SHL, VT, Custom); 487 setOperationAction(ISD::SRA, VT, Custom); 488 setOperationAction(ISD::SRL, VT, Custom); 489 setOperationAction(ISD::ROTL, VT, Custom); 490 491 // Add ISD::VECREDUCE_ADD as custom in order to implement 492 // it with VZERO+VSUM 493 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom); 494 495 // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands 496 // and inverting the result as necessary. 497 setOperationAction(ISD::SETCC, VT, Custom); 498 } 499 } 500 501 if (Subtarget.hasVector()) { 502 // There should be no need to check for float types other than v2f64 503 // since <2 x f32> isn't a legal type. 504 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); 505 setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Legal); 506 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); 507 setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Legal); 508 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); 509 setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Legal); 510 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); 511 setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal); 512 513 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i64, Legal); 514 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f64, Legal); 515 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i64, Legal); 516 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f64, Legal); 517 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i64, Legal); 518 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f64, Legal); 519 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i64, Legal); 520 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f64, Legal); 521 } 522 523 if (Subtarget.hasVectorEnhancements2()) { 524 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 525 setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal); 526 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 527 setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal); 528 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 529 setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal); 530 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 531 setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal); 532 533 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); 534 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4f32, Legal); 535 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, Legal); 536 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4f32, Legal); 537 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); 538 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4f32, Legal); 539 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Legal); 540 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4f32, Legal); 541 } 542 543 // Handle floating-point types. 544 for (unsigned I = MVT::FIRST_FP_VALUETYPE; 545 I <= MVT::LAST_FP_VALUETYPE; 546 ++I) { 547 MVT VT = MVT::SimpleValueType(I); 548 if (isTypeLegal(VT)) { 549 // We can use FI for FRINT. 550 setOperationAction(ISD::FRINT, VT, Legal); 551 552 // We can use the extended form of FI for other rounding operations. 553 if (Subtarget.hasFPExtension()) { 554 setOperationAction(ISD::FNEARBYINT, VT, Legal); 555 setOperationAction(ISD::FFLOOR, VT, Legal); 556 setOperationAction(ISD::FCEIL, VT, Legal); 557 setOperationAction(ISD::FTRUNC, VT, Legal); 558 setOperationAction(ISD::FROUND, VT, Legal); 559 } 560 561 // No special instructions for these. 562 setOperationAction(ISD::FSIN, VT, Expand); 563 setOperationAction(ISD::FCOS, VT, Expand); 564 setOperationAction(ISD::FSINCOS, VT, Expand); 565 setOperationAction(ISD::FREM, VT, Expand); 566 setOperationAction(ISD::FPOW, VT, Expand); 567 568 // Special treatment. 569 setOperationAction(ISD::IS_FPCLASS, VT, Custom); 570 571 // Handle constrained floating-point operations. 572 setOperationAction(ISD::STRICT_FADD, VT, Legal); 573 setOperationAction(ISD::STRICT_FSUB, VT, Legal); 574 setOperationAction(ISD::STRICT_FMUL, VT, Legal); 575 setOperationAction(ISD::STRICT_FDIV, VT, Legal); 576 setOperationAction(ISD::STRICT_FMA, VT, Legal); 577 setOperationAction(ISD::STRICT_FSQRT, VT, Legal); 578 setOperationAction(ISD::STRICT_FRINT, VT, Legal); 579 setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal); 580 setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal); 581 if (Subtarget.hasFPExtension()) { 582 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); 583 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); 584 setOperationAction(ISD::STRICT_FCEIL, VT, Legal); 585 setOperationAction(ISD::STRICT_FROUND, VT, Legal); 586 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); 587 } 588 } 589 } 590 591 // Handle floating-point vector types. 592 if (Subtarget.hasVector()) { 593 // Scalar-to-vector conversion is just a subreg. 594 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); 595 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); 596 597 // Some insertions and extractions can be done directly but others 598 // need to go via integers. 599 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); 600 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); 601 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); 602 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); 603 604 // These operations have direct equivalents. 605 setOperationAction(ISD::FADD, MVT::v2f64, Legal); 606 setOperationAction(ISD::FNEG, MVT::v2f64, Legal); 607 setOperationAction(ISD::FSUB, MVT::v2f64, Legal); 608 setOperationAction(ISD::FMUL, MVT::v2f64, Legal); 609 setOperationAction(ISD::FMA, MVT::v2f64, Legal); 610 setOperationAction(ISD::FDIV, MVT::v2f64, Legal); 611 setOperationAction(ISD::FABS, MVT::v2f64, Legal); 612 setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); 613 setOperationAction(ISD::FRINT, MVT::v2f64, Legal); 614 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); 615 setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); 616 setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); 617 setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); 618 setOperationAction(ISD::FROUND, MVT::v2f64, Legal); 619 620 // Handle constrained floating-point operations. 621 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); 622 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); 623 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); 624 setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal); 625 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); 626 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); 627 setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal); 628 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal); 629 setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal); 630 setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal); 631 setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal); 632 setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal); 633 634 setOperationAction(ISD::SETCC, MVT::v2f64, Custom); 635 setOperationAction(ISD::SETCC, MVT::v4f32, Custom); 636 setOperationAction(ISD::STRICT_FSETCC, MVT::v2f64, Custom); 637 setOperationAction(ISD::STRICT_FSETCC, MVT::v4f32, Custom); 638 if (Subtarget.hasVectorEnhancements1()) { 639 setOperationAction(ISD::STRICT_FSETCCS, MVT::v2f64, Custom); 640 setOperationAction(ISD::STRICT_FSETCCS, MVT::v4f32, Custom); 641 } 642 } 643 644 // The vector enhancements facility 1 has instructions for these. 645 if (Subtarget.hasVectorEnhancements1()) { 646 setOperationAction(ISD::FADD, MVT::v4f32, Legal); 647 setOperationAction(ISD::FNEG, MVT::v4f32, Legal); 648 setOperationAction(ISD::FSUB, MVT::v4f32, Legal); 649 setOperationAction(ISD::FMUL, MVT::v4f32, Legal); 650 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 651 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 652 setOperationAction(ISD::FABS, MVT::v4f32, Legal); 653 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 654 setOperationAction(ISD::FRINT, MVT::v4f32, Legal); 655 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 656 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 657 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 658 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 659 setOperationAction(ISD::FROUND, MVT::v4f32, Legal); 660 661 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal); 662 setOperationAction(ISD::FMAXIMUM, MVT::f64, Legal); 663 setOperationAction(ISD::FMINNUM, MVT::f64, Legal); 664 setOperationAction(ISD::FMINIMUM, MVT::f64, Legal); 665 666 setOperationAction(ISD::FMAXNUM, MVT::v2f64, Legal); 667 setOperationAction(ISD::FMAXIMUM, MVT::v2f64, Legal); 668 setOperationAction(ISD::FMINNUM, MVT::v2f64, Legal); 669 setOperationAction(ISD::FMINIMUM, MVT::v2f64, Legal); 670 671 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal); 672 setOperationAction(ISD::FMAXIMUM, MVT::f32, Legal); 673 setOperationAction(ISD::FMINNUM, MVT::f32, Legal); 674 setOperationAction(ISD::FMINIMUM, MVT::f32, Legal); 675 676 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal); 677 setOperationAction(ISD::FMAXIMUM, MVT::v4f32, Legal); 678 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal); 679 setOperationAction(ISD::FMINIMUM, MVT::v4f32, Legal); 680 681 setOperationAction(ISD::FMAXNUM, MVT::f128, Legal); 682 setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal); 683 setOperationAction(ISD::FMINNUM, MVT::f128, Legal); 684 setOperationAction(ISD::FMINIMUM, MVT::f128, Legal); 685 686 // Handle constrained floating-point operations. 687 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); 688 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); 689 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); 690 setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal); 691 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); 692 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); 693 setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal); 694 setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal); 695 setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal); 696 setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal); 697 setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal); 698 setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal); 699 for (auto VT : { MVT::f32, MVT::f64, MVT::f128, 700 MVT::v4f32, MVT::v2f64 }) { 701 setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal); 702 setOperationAction(ISD::STRICT_FMINNUM, VT, Legal); 703 setOperationAction(ISD::STRICT_FMAXIMUM, VT, Legal); 704 setOperationAction(ISD::STRICT_FMINIMUM, VT, Legal); 705 } 706 } 707 708 // We only have fused f128 multiply-addition on vector registers. 709 if (!Subtarget.hasVectorEnhancements1()) { 710 setOperationAction(ISD::FMA, MVT::f128, Expand); 711 setOperationAction(ISD::STRICT_FMA, MVT::f128, Expand); 712 } 713 714 // We don't have a copysign instruction on vector registers. 715 if (Subtarget.hasVectorEnhancements1()) 716 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand); 717 718 // Needed so that we don't try to implement f128 constant loads using 719 // a load-and-extend of a f80 constant (in cases where the constant 720 // would fit in an f80). 721 for (MVT VT : MVT::fp_valuetypes()) 722 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand); 723 724 // We don't have extending load instruction on vector registers. 725 if (Subtarget.hasVectorEnhancements1()) { 726 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); 727 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); 728 } 729 730 // Floating-point truncation and stores need to be done separately. 731 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 732 setTruncStoreAction(MVT::f128, MVT::f32, Expand); 733 setTruncStoreAction(MVT::f128, MVT::f64, Expand); 734 735 // We have 64-bit FPR<->GPR moves, but need special handling for 736 // 32-bit forms. 737 if (!Subtarget.hasVector()) { 738 setOperationAction(ISD::BITCAST, MVT::i32, Custom); 739 setOperationAction(ISD::BITCAST, MVT::f32, Custom); 740 } 741 742 // VASTART and VACOPY need to deal with the SystemZ-specific varargs 743 // structure, but VAEND is a no-op. 744 setOperationAction(ISD::VASTART, MVT::Other, Custom); 745 setOperationAction(ISD::VACOPY, MVT::Other, Custom); 746 setOperationAction(ISD::VAEND, MVT::Other, Expand); 747 748 if (Subtarget.isTargetzOS()) { 749 // Handle address space casts between mixed sized pointers. 750 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); 751 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); 752 } 753 754 setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom); 755 756 // Codes for which we want to perform some z-specific combinations. 757 setTargetDAGCombine({ISD::ZERO_EXTEND, 758 ISD::SIGN_EXTEND, 759 ISD::SIGN_EXTEND_INREG, 760 ISD::LOAD, 761 ISD::STORE, 762 ISD::VECTOR_SHUFFLE, 763 ISD::EXTRACT_VECTOR_ELT, 764 ISD::FP_ROUND, 765 ISD::STRICT_FP_ROUND, 766 ISD::FP_EXTEND, 767 ISD::SINT_TO_FP, 768 ISD::UINT_TO_FP, 769 ISD::STRICT_FP_EXTEND, 770 ISD::BSWAP, 771 ISD::SDIV, 772 ISD::UDIV, 773 ISD::SREM, 774 ISD::UREM, 775 ISD::INTRINSIC_VOID, 776 ISD::INTRINSIC_W_CHAIN}); 777 778 // Handle intrinsics. 779 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); 780 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 781 782 // We're not using SJLJ for exception handling, but they're implemented 783 // solely to support use of __builtin_setjmp / __builtin_longjmp. 784 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 785 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 786 787 // We want to use MVC in preference to even a single load/store pair. 788 MaxStoresPerMemcpy = Subtarget.hasVector() ? 2 : 0; 789 MaxStoresPerMemcpyOptSize = 0; 790 791 // The main memset sequence is a byte store followed by an MVC. 792 // Two STC or MV..I stores win over that, but the kind of fused stores 793 // generated by target-independent code don't when the byte value is 794 // variable. E.g. "STC <reg>;MHI <reg>,257;STH <reg>" is not better 795 // than "STC;MVC". Handle the choice in target-specific code instead. 796 MaxStoresPerMemset = Subtarget.hasVector() ? 2 : 0; 797 MaxStoresPerMemsetOptSize = 0; 798 799 // Default to having -disable-strictnode-mutation on 800 IsStrictFPEnabled = true; 801 802 if (Subtarget.isTargetzOS()) { 803 struct RTLibCallMapping { 804 RTLIB::Libcall Code; 805 const char *Name; 806 }; 807 static RTLibCallMapping RTLibCallCommon[] = { 808 #define HANDLE_LIBCALL(code, name) {RTLIB::code, name}, 809 #include "ZOSLibcallNames.def" 810 }; 811 for (auto &E : RTLibCallCommon) 812 setLibcallName(E.Code, E.Name); 813 } 814 } 815 816 bool SystemZTargetLowering::useSoftFloat() const { 817 return Subtarget.hasSoftFloat(); 818 } 819 820 EVT SystemZTargetLowering::getSetCCResultType(const DataLayout &DL, 821 LLVMContext &, EVT VT) const { 822 if (!VT.isVector()) 823 return MVT::i32; 824 return VT.changeVectorElementTypeToInteger(); 825 } 826 827 bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( 828 const MachineFunction &MF, EVT VT) const { 829 if (useSoftFloat()) 830 return false; 831 832 VT = VT.getScalarType(); 833 834 if (!VT.isSimple()) 835 return false; 836 837 switch (VT.getSimpleVT().SimpleTy) { 838 case MVT::f32: 839 case MVT::f64: 840 return true; 841 case MVT::f128: 842 return Subtarget.hasVectorEnhancements1(); 843 default: 844 break; 845 } 846 847 return false; 848 } 849 850 // Return true if the constant can be generated with a vector instruction, 851 // such as VGM, VGMB or VREPI. 852 bool SystemZVectorConstantInfo::isVectorConstantLegal( 853 const SystemZSubtarget &Subtarget) { 854 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 855 if (!Subtarget.hasVector() || 856 (isFP128 && !Subtarget.hasVectorEnhancements1())) 857 return false; 858 859 // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- 860 // preferred way of creating all-zero and all-one vectors so give it 861 // priority over other methods below. 862 unsigned Mask = 0; 863 unsigned I = 0; 864 for (; I < SystemZ::VectorBytes; ++I) { 865 uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); 866 if (Byte == 0xff) 867 Mask |= 1ULL << I; 868 else if (Byte != 0) 869 break; 870 } 871 if (I == SystemZ::VectorBytes) { 872 Opcode = SystemZISD::BYTE_MASK; 873 OpVals.push_back(Mask); 874 VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); 875 return true; 876 } 877 878 if (SplatBitSize > 64) 879 return false; 880 881 auto tryValue = [&](uint64_t Value) -> bool { 882 // Try VECTOR REPLICATE IMMEDIATE 883 int64_t SignedValue = SignExtend64(Value, SplatBitSize); 884 if (isInt<16>(SignedValue)) { 885 OpVals.push_back(((unsigned) SignedValue)); 886 Opcode = SystemZISD::REPLICATE; 887 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), 888 SystemZ::VectorBits / SplatBitSize); 889 return true; 890 } 891 // Try VECTOR GENERATE MASK 892 unsigned Start, End; 893 if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { 894 // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 895 // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for 896 // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). 897 OpVals.push_back(Start - (64 - SplatBitSize)); 898 OpVals.push_back(End - (64 - SplatBitSize)); 899 Opcode = SystemZISD::ROTATE_MASK; 900 VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), 901 SystemZ::VectorBits / SplatBitSize); 902 return true; 903 } 904 return false; 905 }; 906 907 // First try assuming that any undefined bits above the highest set bit 908 // and below the lowest set bit are 1s. This increases the likelihood of 909 // being able to use a sign-extended element value in VECTOR REPLICATE 910 // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. 911 uint64_t SplatBitsZ = SplatBits.getZExtValue(); 912 uint64_t SplatUndefZ = SplatUndef.getZExtValue(); 913 unsigned LowerBits = llvm::countr_zero(SplatBitsZ); 914 unsigned UpperBits = llvm::countl_zero(SplatBitsZ); 915 uint64_t Lower = SplatUndefZ & maskTrailingOnes<uint64_t>(LowerBits); 916 uint64_t Upper = SplatUndefZ & maskLeadingOnes<uint64_t>(UpperBits); 917 if (tryValue(SplatBitsZ | Upper | Lower)) 918 return true; 919 920 // Now try assuming that any undefined bits between the first and 921 // last defined set bits are set. This increases the chances of 922 // using a non-wraparound mask. 923 uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; 924 return tryValue(SplatBitsZ | Middle); 925 } 926 927 SystemZVectorConstantInfo::SystemZVectorConstantInfo(APInt IntImm) { 928 if (IntImm.isSingleWord()) { 929 IntBits = APInt(128, IntImm.getZExtValue()); 930 IntBits <<= (SystemZ::VectorBits - IntImm.getBitWidth()); 931 } else 932 IntBits = IntImm; 933 assert(IntBits.getBitWidth() == 128 && "Unsupported APInt."); 934 935 // Find the smallest splat. 936 SplatBits = IntImm; 937 unsigned Width = SplatBits.getBitWidth(); 938 while (Width > 8) { 939 unsigned HalfSize = Width / 2; 940 APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); 941 APInt LowValue = SplatBits.trunc(HalfSize); 942 943 // If the two halves do not match, stop here. 944 if (HighValue != LowValue || 8 > HalfSize) 945 break; 946 947 SplatBits = HighValue; 948 Width = HalfSize; 949 } 950 SplatUndef = 0; 951 SplatBitSize = Width; 952 } 953 954 SystemZVectorConstantInfo::SystemZVectorConstantInfo(BuildVectorSDNode *BVN) { 955 assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); 956 bool HasAnyUndefs; 957 958 // Get IntBits by finding the 128 bit splat. 959 BVN->isConstantSplat(IntBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, 960 true); 961 962 // Get SplatBits by finding the 8 bit or greater splat. 963 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, 964 true); 965 } 966 967 bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 968 bool ForCodeSize) const { 969 // We can load zero using LZ?R and negative zero using LZ?R;LC?BR. 970 if (Imm.isZero() || Imm.isNegZero()) 971 return true; 972 973 return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); 974 } 975 976 MachineBasicBlock * 977 SystemZTargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, 978 MachineBasicBlock *MBB) const { 979 DebugLoc DL = MI.getDebugLoc(); 980 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 981 const SystemZRegisterInfo *TRI = Subtarget.getRegisterInfo(); 982 983 MachineFunction *MF = MBB->getParent(); 984 MachineRegisterInfo &MRI = MF->getRegInfo(); 985 986 const BasicBlock *BB = MBB->getBasicBlock(); 987 MachineFunction::iterator I = ++MBB->getIterator(); 988 989 Register DstReg = MI.getOperand(0).getReg(); 990 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 991 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); 992 (void)TRI; 993 Register mainDstReg = MRI.createVirtualRegister(RC); 994 Register restoreDstReg = MRI.createVirtualRegister(RC); 995 996 MVT PVT = getPointerTy(MF->getDataLayout()); 997 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); 998 // For v = setjmp(buf), we generate. 999 // Algorithm: 1000 // 1001 // --------- 1002 // | thisMBB | 1003 // --------- 1004 // | 1005 // ------------------------ 1006 // | | 1007 // ---------- --------------- 1008 // | mainMBB | | restoreMBB | 1009 // | v = 0 | | v = 1 | 1010 // ---------- --------------- 1011 // | | 1012 // ------------------------- 1013 // | 1014 // ----------------------------- 1015 // | sinkMBB | 1016 // | phi(v_mainMBB,v_restoreMBB) | 1017 // ----------------------------- 1018 // thisMBB: 1019 // buf[FPOffset] = Frame Pointer if hasFP. 1020 // buf[LabelOffset] = restoreMBB <-- takes address of restoreMBB. 1021 // buf[BCOffset] = Backchain value if building with -mbackchain. 1022 // buf[SPOffset] = Stack Pointer. 1023 // buf[LPOffset] = We never write this slot with R13, gcc stores R13 always. 1024 // SjLjSetup restoreMBB 1025 // mainMBB: 1026 // v_main = 0 1027 // sinkMBB: 1028 // v = phi(v_main, v_restore) 1029 // restoreMBB: 1030 // v_restore = 1 1031 1032 MachineBasicBlock *thisMBB = MBB; 1033 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 1034 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 1035 MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); 1036 1037 MF->insert(I, mainMBB); 1038 MF->insert(I, sinkMBB); 1039 MF->push_back(restoreMBB); 1040 restoreMBB->setMachineBlockAddressTaken(); 1041 1042 MachineInstrBuilder MIB; 1043 1044 // Transfer the remainder of BB and its successor edges to sinkMBB. 1045 sinkMBB->splice(sinkMBB->begin(), MBB, 1046 std::next(MachineBasicBlock::iterator(MI)), MBB->end()); 1047 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 1048 1049 // thisMBB: 1050 const int64_t FPOffset = 0; // Slot 1. 1051 const int64_t LabelOffset = 1 * PVT.getStoreSize(); // Slot 2. 1052 const int64_t BCOffset = 2 * PVT.getStoreSize(); // Slot 3. 1053 const int64_t SPOffset = 3 * PVT.getStoreSize(); // Slot 4. 1054 1055 // Buf address. 1056 Register BufReg = MI.getOperand(1).getReg(); 1057 1058 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 1059 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 1060 1061 // Prepare IP for longjmp. 1062 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::LARL), LabelReg) 1063 .addMBB(restoreMBB); 1064 // Store IP for return from jmp, slot 2, offset = 1. 1065 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG)) 1066 .addReg(LabelReg) 1067 .addReg(BufReg) 1068 .addImm(LabelOffset) 1069 .addReg(0); 1070 1071 auto *SpecialRegs = Subtarget.getSpecialRegisters(); 1072 bool HasFP = Subtarget.getFrameLowering()->hasFP(*MF); 1073 if (HasFP) { 1074 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG)) 1075 .addReg(SpecialRegs->getFramePointerRegister()) 1076 .addReg(BufReg) 1077 .addImm(FPOffset) 1078 .addReg(0); 1079 } 1080 1081 // Store SP. 1082 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG)) 1083 .addReg(SpecialRegs->getStackPointerRegister()) 1084 .addReg(BufReg) 1085 .addImm(SPOffset) 1086 .addReg(0); 1087 1088 // Slot 3(Offset = 2) Backchain value (if building with -mbackchain). 1089 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain(); 1090 if (BackChain) { 1091 Register BCReg = MRI.createVirtualRegister(PtrRC); 1092 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 1093 MIB = BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::LG), BCReg) 1094 .addReg(SpecialRegs->getStackPointerRegister()) 1095 .addImm(TFL->getBackchainOffset(*MF)) 1096 .addReg(0); 1097 1098 BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::STG)) 1099 .addReg(BCReg) 1100 .addReg(BufReg) 1101 .addImm(BCOffset) 1102 .addReg(0); 1103 } 1104 1105 // Setup. 1106 MIB = BuildMI(*thisMBB, MI, DL, TII->get(SystemZ::EH_SjLj_Setup)) 1107 .addMBB(restoreMBB); 1108 1109 const SystemZRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1110 MIB.addRegMask(RegInfo->getNoPreservedMask()); 1111 1112 thisMBB->addSuccessor(mainMBB); 1113 thisMBB->addSuccessor(restoreMBB); 1114 1115 // mainMBB: 1116 BuildMI(mainMBB, DL, TII->get(SystemZ::LHI), mainDstReg).addImm(0); 1117 mainMBB->addSuccessor(sinkMBB); 1118 1119 // sinkMBB: 1120 BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(SystemZ::PHI), DstReg) 1121 .addReg(mainDstReg) 1122 .addMBB(mainMBB) 1123 .addReg(restoreDstReg) 1124 .addMBB(restoreMBB); 1125 1126 // restoreMBB. 1127 BuildMI(restoreMBB, DL, TII->get(SystemZ::LHI), restoreDstReg).addImm(1); 1128 BuildMI(restoreMBB, DL, TII->get(SystemZ::J)).addMBB(sinkMBB); 1129 restoreMBB->addSuccessor(sinkMBB); 1130 1131 MI.eraseFromParent(); 1132 1133 return sinkMBB; 1134 } 1135 1136 MachineBasicBlock * 1137 SystemZTargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, 1138 MachineBasicBlock *MBB) const { 1139 1140 DebugLoc DL = MI.getDebugLoc(); 1141 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 1142 1143 MachineFunction *MF = MBB->getParent(); 1144 MachineRegisterInfo &MRI = MF->getRegInfo(); 1145 1146 MVT PVT = getPointerTy(MF->getDataLayout()); 1147 assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); 1148 Register BufReg = MI.getOperand(0).getReg(); 1149 const TargetRegisterClass *RC = MRI.getRegClass(BufReg); 1150 auto *SpecialRegs = Subtarget.getSpecialRegisters(); 1151 1152 Register Tmp = MRI.createVirtualRegister(RC); 1153 Register BCReg = MRI.createVirtualRegister(RC); 1154 1155 MachineInstrBuilder MIB; 1156 1157 const int64_t FPOffset = 0; 1158 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 1159 const int64_t BCOffset = 2 * PVT.getStoreSize(); 1160 const int64_t SPOffset = 3 * PVT.getStoreSize(); 1161 const int64_t LPOffset = 4 * PVT.getStoreSize(); 1162 1163 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), Tmp) 1164 .addReg(BufReg) 1165 .addImm(LabelOffset) 1166 .addReg(0); 1167 1168 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), 1169 SpecialRegs->getFramePointerRegister()) 1170 .addReg(BufReg) 1171 .addImm(FPOffset) 1172 .addReg(0); 1173 1174 // We are restoring R13 even though we never stored in setjmp from llvm, 1175 // as gcc always stores R13 in builtin_setjmp. We could have mixed code 1176 // gcc setjmp and llvm longjmp. 1177 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), SystemZ::R13D) 1178 .addReg(BufReg) 1179 .addImm(LPOffset) 1180 .addReg(0); 1181 1182 bool BackChain = MF->getSubtarget<SystemZSubtarget>().hasBackChain(); 1183 if (BackChain) { 1184 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), BCReg) 1185 .addReg(BufReg) 1186 .addImm(BCOffset) 1187 .addReg(0); 1188 } 1189 1190 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::LG), 1191 SpecialRegs->getStackPointerRegister()) 1192 .addReg(BufReg) 1193 .addImm(SPOffset) 1194 .addReg(0); 1195 1196 if (BackChain) { 1197 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 1198 BuildMI(*MBB, MI, DL, TII->get(SystemZ::STG)) 1199 .addReg(BCReg) 1200 .addReg(SpecialRegs->getStackPointerRegister()) 1201 .addImm(TFL->getBackchainOffset(*MF)) 1202 .addReg(0); 1203 } 1204 1205 MIB = BuildMI(*MBB, MI, DL, TII->get(SystemZ::BR)).addReg(Tmp); 1206 1207 MI.eraseFromParent(); 1208 return MBB; 1209 } 1210 1211 /// Returns true if stack probing through inline assembly is requested. 1212 bool SystemZTargetLowering::hasInlineStackProbe(const MachineFunction &MF) const { 1213 // If the function specifically requests inline stack probes, emit them. 1214 if (MF.getFunction().hasFnAttribute("probe-stack")) 1215 return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == 1216 "inline-asm"; 1217 return false; 1218 } 1219 1220 TargetLowering::AtomicExpansionKind 1221 SystemZTargetLowering::shouldCastAtomicLoadInIR(LoadInst *LI) const { 1222 return AtomicExpansionKind::None; 1223 } 1224 1225 TargetLowering::AtomicExpansionKind 1226 SystemZTargetLowering::shouldCastAtomicStoreInIR(StoreInst *SI) const { 1227 return AtomicExpansionKind::None; 1228 } 1229 1230 TargetLowering::AtomicExpansionKind 1231 SystemZTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 1232 // Don't expand subword operations as they require special treatment. 1233 if (RMW->getType()->isIntegerTy(8) || RMW->getType()->isIntegerTy(16)) 1234 return AtomicExpansionKind::None; 1235 1236 // Don't expand if there is a target instruction available. 1237 if (Subtarget.hasInterlockedAccess1() && 1238 (RMW->getType()->isIntegerTy(32) || RMW->getType()->isIntegerTy(64)) && 1239 (RMW->getOperation() == AtomicRMWInst::BinOp::Add || 1240 RMW->getOperation() == AtomicRMWInst::BinOp::Sub || 1241 RMW->getOperation() == AtomicRMWInst::BinOp::And || 1242 RMW->getOperation() == AtomicRMWInst::BinOp::Or || 1243 RMW->getOperation() == AtomicRMWInst::BinOp::Xor)) 1244 return AtomicExpansionKind::None; 1245 1246 return AtomicExpansionKind::CmpXChg; 1247 } 1248 1249 bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 1250 // We can use CGFI or CLGFI. 1251 return isInt<32>(Imm) || isUInt<32>(Imm); 1252 } 1253 1254 bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { 1255 // We can use ALGFI or SLGFI. 1256 return isUInt<32>(Imm) || isUInt<32>(-Imm); 1257 } 1258 1259 bool SystemZTargetLowering::allowsMisalignedMemoryAccesses( 1260 EVT VT, unsigned, Align, MachineMemOperand::Flags, unsigned *Fast) const { 1261 // Unaligned accesses should never be slower than the expanded version. 1262 // We check specifically for aligned accesses in the few cases where 1263 // they are required. 1264 if (Fast) 1265 *Fast = 1; 1266 return true; 1267 } 1268 1269 // Information about the addressing mode for a memory access. 1270 struct AddressingMode { 1271 // True if a long displacement is supported. 1272 bool LongDisplacement; 1273 1274 // True if use of index register is supported. 1275 bool IndexReg; 1276 1277 AddressingMode(bool LongDispl, bool IdxReg) : 1278 LongDisplacement(LongDispl), IndexReg(IdxReg) {} 1279 }; 1280 1281 // Return the desired addressing mode for a Load which has only one use (in 1282 // the same block) which is a Store. 1283 static AddressingMode getLoadStoreAddrMode(bool HasVector, 1284 Type *Ty) { 1285 // With vector support a Load->Store combination may be combined to either 1286 // an MVC or vector operations and it seems to work best to allow the 1287 // vector addressing mode. 1288 if (HasVector) 1289 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); 1290 1291 // Otherwise only the MVC case is special. 1292 bool MVC = Ty->isIntegerTy(8); 1293 return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); 1294 } 1295 1296 // Return the addressing mode which seems most desirable given an LLVM 1297 // Instruction pointer. 1298 static AddressingMode 1299 supportedAddressingMode(Instruction *I, bool HasVector) { 1300 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 1301 switch (II->getIntrinsicID()) { 1302 default: break; 1303 case Intrinsic::memset: 1304 case Intrinsic::memmove: 1305 case Intrinsic::memcpy: 1306 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); 1307 } 1308 } 1309 1310 if (isa<LoadInst>(I) && I->hasOneUse()) { 1311 auto *SingleUser = cast<Instruction>(*I->user_begin()); 1312 if (SingleUser->getParent() == I->getParent()) { 1313 if (isa<ICmpInst>(SingleUser)) { 1314 if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1))) 1315 if (C->getBitWidth() <= 64 && 1316 (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))) 1317 // Comparison of memory with 16 bit signed / unsigned immediate 1318 return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); 1319 } else if (isa<StoreInst>(SingleUser)) 1320 // Load->Store 1321 return getLoadStoreAddrMode(HasVector, I->getType()); 1322 } 1323 } else if (auto *StoreI = dyn_cast<StoreInst>(I)) { 1324 if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand())) 1325 if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) 1326 // Load->Store 1327 return getLoadStoreAddrMode(HasVector, LoadI->getType()); 1328 } 1329 1330 if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) { 1331 1332 // * Use LDE instead of LE/LEY for z13 to avoid partial register 1333 // dependencies (LDE only supports small offsets). 1334 // * Utilize the vector registers to hold floating point 1335 // values (vector load / store instructions only support small 1336 // offsets). 1337 1338 Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() : 1339 I->getOperand(0)->getType()); 1340 bool IsFPAccess = MemAccessTy->isFloatingPointTy(); 1341 bool IsVectorAccess = MemAccessTy->isVectorTy(); 1342 1343 // A store of an extracted vector element will be combined into a VSTE type 1344 // instruction. 1345 if (!IsVectorAccess && isa<StoreInst>(I)) { 1346 Value *DataOp = I->getOperand(0); 1347 if (isa<ExtractElementInst>(DataOp)) 1348 IsVectorAccess = true; 1349 } 1350 1351 // A load which gets inserted into a vector element will be combined into a 1352 // VLE type instruction. 1353 if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) { 1354 User *LoadUser = *I->user_begin(); 1355 if (isa<InsertElementInst>(LoadUser)) 1356 IsVectorAccess = true; 1357 } 1358 1359 if (IsFPAccess || IsVectorAccess) 1360 return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); 1361 } 1362 1363 return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); 1364 } 1365 1366 bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, 1367 const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { 1368 // Punt on globals for now, although they can be used in limited 1369 // RELATIVE LONG cases. 1370 if (AM.BaseGV) 1371 return false; 1372 1373 // Require a 20-bit signed offset. 1374 if (!isInt<20>(AM.BaseOffs)) 1375 return false; 1376 1377 bool RequireD12 = 1378 Subtarget.hasVector() && (Ty->isVectorTy() || Ty->isIntegerTy(128)); 1379 AddressingMode SupportedAM(!RequireD12, true); 1380 if (I != nullptr) 1381 SupportedAM = supportedAddressingMode(I, Subtarget.hasVector()); 1382 1383 if (!SupportedAM.LongDisplacement && !isUInt<12>(AM.BaseOffs)) 1384 return false; 1385 1386 if (!SupportedAM.IndexReg) 1387 // No indexing allowed. 1388 return AM.Scale == 0; 1389 else 1390 // Indexing is OK but no scale factor can be applied. 1391 return AM.Scale == 0 || AM.Scale == 1; 1392 } 1393 1394 bool SystemZTargetLowering::findOptimalMemOpLowering( 1395 std::vector<EVT> &MemOps, unsigned Limit, const MemOp &Op, unsigned DstAS, 1396 unsigned SrcAS, const AttributeList &FuncAttributes) const { 1397 const int MVCFastLen = 16; 1398 1399 if (Limit != ~unsigned(0)) { 1400 // Don't expand Op into scalar loads/stores in these cases: 1401 if (Op.isMemcpy() && Op.allowOverlap() && Op.size() <= MVCFastLen) 1402 return false; // Small memcpy: Use MVC 1403 if (Op.isMemset() && Op.size() - 1 <= MVCFastLen) 1404 return false; // Small memset (first byte with STC/MVI): Use MVC 1405 if (Op.isZeroMemset()) 1406 return false; // Memset zero: Use XC 1407 } 1408 1409 return TargetLowering::findOptimalMemOpLowering(MemOps, Limit, Op, DstAS, 1410 SrcAS, FuncAttributes); 1411 } 1412 1413 EVT SystemZTargetLowering::getOptimalMemOpType(const MemOp &Op, 1414 const AttributeList &FuncAttributes) const { 1415 return Subtarget.hasVector() ? MVT::v2i64 : MVT::Other; 1416 } 1417 1418 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const { 1419 if (!FromType->isIntegerTy() || !ToType->isIntegerTy()) 1420 return false; 1421 unsigned FromBits = FromType->getPrimitiveSizeInBits().getFixedValue(); 1422 unsigned ToBits = ToType->getPrimitiveSizeInBits().getFixedValue(); 1423 return FromBits > ToBits; 1424 } 1425 1426 bool SystemZTargetLowering::isTruncateFree(EVT FromVT, EVT ToVT) const { 1427 if (!FromVT.isInteger() || !ToVT.isInteger()) 1428 return false; 1429 unsigned FromBits = FromVT.getFixedSizeInBits(); 1430 unsigned ToBits = ToVT.getFixedSizeInBits(); 1431 return FromBits > ToBits; 1432 } 1433 1434 //===----------------------------------------------------------------------===// 1435 // Inline asm support 1436 //===----------------------------------------------------------------------===// 1437 1438 TargetLowering::ConstraintType 1439 SystemZTargetLowering::getConstraintType(StringRef Constraint) const { 1440 if (Constraint.size() == 1) { 1441 switch (Constraint[0]) { 1442 case 'a': // Address register 1443 case 'd': // Data register (equivalent to 'r') 1444 case 'f': // Floating-point register 1445 case 'h': // High-part register 1446 case 'r': // General-purpose register 1447 case 'v': // Vector register 1448 return C_RegisterClass; 1449 1450 case 'Q': // Memory with base and unsigned 12-bit displacement 1451 case 'R': // Likewise, plus an index 1452 case 'S': // Memory with base and signed 20-bit displacement 1453 case 'T': // Likewise, plus an index 1454 case 'm': // Equivalent to 'T'. 1455 return C_Memory; 1456 1457 case 'I': // Unsigned 8-bit constant 1458 case 'J': // Unsigned 12-bit constant 1459 case 'K': // Signed 16-bit constant 1460 case 'L': // Signed 20-bit displacement (on all targets we support) 1461 case 'M': // 0x7fffffff 1462 return C_Immediate; 1463 1464 default: 1465 break; 1466 } 1467 } else if (Constraint.size() == 2 && Constraint[0] == 'Z') { 1468 switch (Constraint[1]) { 1469 case 'Q': // Address with base and unsigned 12-bit displacement 1470 case 'R': // Likewise, plus an index 1471 case 'S': // Address with base and signed 20-bit displacement 1472 case 'T': // Likewise, plus an index 1473 return C_Address; 1474 1475 default: 1476 break; 1477 } 1478 } 1479 return TargetLowering::getConstraintType(Constraint); 1480 } 1481 1482 TargetLowering::ConstraintWeight SystemZTargetLowering:: 1483 getSingleConstraintMatchWeight(AsmOperandInfo &info, 1484 const char *constraint) const { 1485 ConstraintWeight weight = CW_Invalid; 1486 Value *CallOperandVal = info.CallOperandVal; 1487 // If we don't have a value, we can't do a match, 1488 // but allow it at the lowest weight. 1489 if (!CallOperandVal) 1490 return CW_Default; 1491 Type *type = CallOperandVal->getType(); 1492 // Look at the constraint type. 1493 switch (*constraint) { 1494 default: 1495 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 1496 break; 1497 1498 case 'a': // Address register 1499 case 'd': // Data register (equivalent to 'r') 1500 case 'h': // High-part register 1501 case 'r': // General-purpose register 1502 weight = CallOperandVal->getType()->isIntegerTy() ? CW_Register : CW_Default; 1503 break; 1504 1505 case 'f': // Floating-point register 1506 if (!useSoftFloat()) 1507 weight = type->isFloatingPointTy() ? CW_Register : CW_Default; 1508 break; 1509 1510 case 'v': // Vector register 1511 if (Subtarget.hasVector()) 1512 weight = (type->isVectorTy() || type->isFloatingPointTy()) ? CW_Register 1513 : CW_Default; 1514 break; 1515 1516 case 'I': // Unsigned 8-bit constant 1517 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1518 if (isUInt<8>(C->getZExtValue())) 1519 weight = CW_Constant; 1520 break; 1521 1522 case 'J': // Unsigned 12-bit constant 1523 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1524 if (isUInt<12>(C->getZExtValue())) 1525 weight = CW_Constant; 1526 break; 1527 1528 case 'K': // Signed 16-bit constant 1529 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1530 if (isInt<16>(C->getSExtValue())) 1531 weight = CW_Constant; 1532 break; 1533 1534 case 'L': // Signed 20-bit displacement (on all targets we support) 1535 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1536 if (isInt<20>(C->getSExtValue())) 1537 weight = CW_Constant; 1538 break; 1539 1540 case 'M': // 0x7fffffff 1541 if (auto *C = dyn_cast<ConstantInt>(CallOperandVal)) 1542 if (C->getZExtValue() == 0x7fffffff) 1543 weight = CW_Constant; 1544 break; 1545 } 1546 return weight; 1547 } 1548 1549 // Parse a "{tNNN}" register constraint for which the register type "t" 1550 // has already been verified. MC is the class associated with "t" and 1551 // Map maps 0-based register numbers to LLVM register numbers. 1552 static std::pair<unsigned, const TargetRegisterClass *> 1553 parseRegisterNumber(StringRef Constraint, const TargetRegisterClass *RC, 1554 const unsigned *Map, unsigned Size) { 1555 assert(*(Constraint.end()-1) == '}' && "Missing '}'"); 1556 if (isdigit(Constraint[2])) { 1557 unsigned Index; 1558 bool Failed = 1559 Constraint.slice(2, Constraint.size() - 1).getAsInteger(10, Index); 1560 if (!Failed && Index < Size && Map[Index]) 1561 return std::make_pair(Map[Index], RC); 1562 } 1563 return std::make_pair(0U, nullptr); 1564 } 1565 1566 std::pair<unsigned, const TargetRegisterClass *> 1567 SystemZTargetLowering::getRegForInlineAsmConstraint( 1568 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { 1569 if (Constraint.size() == 1) { 1570 // GCC Constraint Letters 1571 switch (Constraint[0]) { 1572 default: break; 1573 case 'd': // Data register (equivalent to 'r') 1574 case 'r': // General-purpose register 1575 if (VT.getSizeInBits() == 64) 1576 return std::make_pair(0U, &SystemZ::GR64BitRegClass); 1577 else if (VT.getSizeInBits() == 128) 1578 return std::make_pair(0U, &SystemZ::GR128BitRegClass); 1579 return std::make_pair(0U, &SystemZ::GR32BitRegClass); 1580 1581 case 'a': // Address register 1582 if (VT == MVT::i64) 1583 return std::make_pair(0U, &SystemZ::ADDR64BitRegClass); 1584 else if (VT == MVT::i128) 1585 return std::make_pair(0U, &SystemZ::ADDR128BitRegClass); 1586 return std::make_pair(0U, &SystemZ::ADDR32BitRegClass); 1587 1588 case 'h': // High-part register (an LLVM extension) 1589 return std::make_pair(0U, &SystemZ::GRH32BitRegClass); 1590 1591 case 'f': // Floating-point register 1592 if (!useSoftFloat()) { 1593 if (VT.getSizeInBits() == 64) 1594 return std::make_pair(0U, &SystemZ::FP64BitRegClass); 1595 else if (VT.getSizeInBits() == 128) 1596 return std::make_pair(0U, &SystemZ::FP128BitRegClass); 1597 return std::make_pair(0U, &SystemZ::FP32BitRegClass); 1598 } 1599 break; 1600 1601 case 'v': // Vector register 1602 if (Subtarget.hasVector()) { 1603 if (VT.getSizeInBits() == 32) 1604 return std::make_pair(0U, &SystemZ::VR32BitRegClass); 1605 if (VT.getSizeInBits() == 64) 1606 return std::make_pair(0U, &SystemZ::VR64BitRegClass); 1607 return std::make_pair(0U, &SystemZ::VR128BitRegClass); 1608 } 1609 break; 1610 } 1611 } 1612 if (Constraint.starts_with("{")) { 1613 1614 // A clobber constraint (e.g. ~{f0}) will have MVT::Other which is illegal 1615 // to check the size on. 1616 auto getVTSizeInBits = [&VT]() { 1617 return VT == MVT::Other ? 0 : VT.getSizeInBits(); 1618 }; 1619 1620 // We need to override the default register parsing for GPRs and FPRs 1621 // because the interpretation depends on VT. The internal names of 1622 // the registers are also different from the external names 1623 // (F0D and F0S instead of F0, etc.). 1624 if (Constraint[1] == 'r') { 1625 if (getVTSizeInBits() == 32) 1626 return parseRegisterNumber(Constraint, &SystemZ::GR32BitRegClass, 1627 SystemZMC::GR32Regs, 16); 1628 if (getVTSizeInBits() == 128) 1629 return parseRegisterNumber(Constraint, &SystemZ::GR128BitRegClass, 1630 SystemZMC::GR128Regs, 16); 1631 return parseRegisterNumber(Constraint, &SystemZ::GR64BitRegClass, 1632 SystemZMC::GR64Regs, 16); 1633 } 1634 if (Constraint[1] == 'f') { 1635 if (useSoftFloat()) 1636 return std::make_pair( 1637 0u, static_cast<const TargetRegisterClass *>(nullptr)); 1638 if (getVTSizeInBits() == 32) 1639 return parseRegisterNumber(Constraint, &SystemZ::FP32BitRegClass, 1640 SystemZMC::FP32Regs, 16); 1641 if (getVTSizeInBits() == 128) 1642 return parseRegisterNumber(Constraint, &SystemZ::FP128BitRegClass, 1643 SystemZMC::FP128Regs, 16); 1644 return parseRegisterNumber(Constraint, &SystemZ::FP64BitRegClass, 1645 SystemZMC::FP64Regs, 16); 1646 } 1647 if (Constraint[1] == 'v') { 1648 if (!Subtarget.hasVector()) 1649 return std::make_pair( 1650 0u, static_cast<const TargetRegisterClass *>(nullptr)); 1651 if (getVTSizeInBits() == 32) 1652 return parseRegisterNumber(Constraint, &SystemZ::VR32BitRegClass, 1653 SystemZMC::VR32Regs, 32); 1654 if (getVTSizeInBits() == 64) 1655 return parseRegisterNumber(Constraint, &SystemZ::VR64BitRegClass, 1656 SystemZMC::VR64Regs, 32); 1657 return parseRegisterNumber(Constraint, &SystemZ::VR128BitRegClass, 1658 SystemZMC::VR128Regs, 32); 1659 } 1660 } 1661 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); 1662 } 1663 1664 // FIXME? Maybe this could be a TableGen attribute on some registers and 1665 // this table could be generated automatically from RegInfo. 1666 Register 1667 SystemZTargetLowering::getRegisterByName(const char *RegName, LLT VT, 1668 const MachineFunction &MF) const { 1669 Register Reg = 1670 StringSwitch<Register>(RegName) 1671 .Case("r4", Subtarget.isTargetXPLINK64() ? SystemZ::R4D 1672 : SystemZ::NoRegister) 1673 .Case("r15", 1674 Subtarget.isTargetELF() ? SystemZ::R15D : SystemZ::NoRegister) 1675 .Default(SystemZ::NoRegister); 1676 1677 if (Reg) 1678 return Reg; 1679 report_fatal_error("Invalid register name global variable"); 1680 } 1681 1682 Register SystemZTargetLowering::getExceptionPointerRegister( 1683 const Constant *PersonalityFn) const { 1684 return Subtarget.isTargetXPLINK64() ? SystemZ::R1D : SystemZ::R6D; 1685 } 1686 1687 Register SystemZTargetLowering::getExceptionSelectorRegister( 1688 const Constant *PersonalityFn) const { 1689 return Subtarget.isTargetXPLINK64() ? SystemZ::R2D : SystemZ::R7D; 1690 } 1691 1692 void SystemZTargetLowering::LowerAsmOperandForConstraint( 1693 SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops, 1694 SelectionDAG &DAG) const { 1695 // Only support length 1 constraints for now. 1696 if (Constraint.size() == 1) { 1697 switch (Constraint[0]) { 1698 case 'I': // Unsigned 8-bit constant 1699 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1700 if (isUInt<8>(C->getZExtValue())) 1701 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1702 Op.getValueType())); 1703 return; 1704 1705 case 'J': // Unsigned 12-bit constant 1706 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1707 if (isUInt<12>(C->getZExtValue())) 1708 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1709 Op.getValueType())); 1710 return; 1711 1712 case 'K': // Signed 16-bit constant 1713 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1714 if (isInt<16>(C->getSExtValue())) 1715 Ops.push_back(DAG.getSignedTargetConstant( 1716 C->getSExtValue(), SDLoc(Op), Op.getValueType())); 1717 return; 1718 1719 case 'L': // Signed 20-bit displacement (on all targets we support) 1720 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1721 if (isInt<20>(C->getSExtValue())) 1722 Ops.push_back(DAG.getSignedTargetConstant( 1723 C->getSExtValue(), SDLoc(Op), Op.getValueType())); 1724 return; 1725 1726 case 'M': // 0x7fffffff 1727 if (auto *C = dyn_cast<ConstantSDNode>(Op)) 1728 if (C->getZExtValue() == 0x7fffffff) 1729 Ops.push_back(DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), 1730 Op.getValueType())); 1731 return; 1732 } 1733 } 1734 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 1735 } 1736 1737 //===----------------------------------------------------------------------===// 1738 // Calling conventions 1739 //===----------------------------------------------------------------------===// 1740 1741 #include "SystemZGenCallingConv.inc" 1742 1743 const MCPhysReg *SystemZTargetLowering::getScratchRegisters( 1744 CallingConv::ID) const { 1745 static const MCPhysReg ScratchRegs[] = { SystemZ::R0D, SystemZ::R1D, 1746 SystemZ::R14D, 0 }; 1747 return ScratchRegs; 1748 } 1749 1750 bool SystemZTargetLowering::allowTruncateForTailCall(Type *FromType, 1751 Type *ToType) const { 1752 return isTruncateFree(FromType, ToType); 1753 } 1754 1755 bool SystemZTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { 1756 return CI->isTailCall(); 1757 } 1758 1759 // Value is a value that has been passed to us in the location described by VA 1760 // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining 1761 // any loads onto Chain. 1762 static SDValue convertLocVTToValVT(SelectionDAG &DAG, const SDLoc &DL, 1763 CCValAssign &VA, SDValue Chain, 1764 SDValue Value) { 1765 // If the argument has been promoted from a smaller type, insert an 1766 // assertion to capture this. 1767 if (VA.getLocInfo() == CCValAssign::SExt) 1768 Value = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Value, 1769 DAG.getValueType(VA.getValVT())); 1770 else if (VA.getLocInfo() == CCValAssign::ZExt) 1771 Value = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Value, 1772 DAG.getValueType(VA.getValVT())); 1773 1774 if (VA.isExtInLoc()) 1775 Value = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Value); 1776 else if (VA.getLocInfo() == CCValAssign::BCvt) { 1777 // If this is a short vector argument loaded from the stack, 1778 // extend from i64 to full vector size and then bitcast. 1779 assert(VA.getLocVT() == MVT::i64); 1780 assert(VA.getValVT().isVector()); 1781 Value = DAG.getBuildVector(MVT::v2i64, DL, {Value, DAG.getUNDEF(MVT::i64)}); 1782 Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); 1783 } else 1784 assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); 1785 return Value; 1786 } 1787 1788 // Value is a value of type VA.getValVT() that we need to copy into 1789 // the location described by VA. Return a copy of Value converted to 1790 // VA.getValVT(). The caller is responsible for handling indirect values. 1791 static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL, 1792 CCValAssign &VA, SDValue Value) { 1793 switch (VA.getLocInfo()) { 1794 case CCValAssign::SExt: 1795 return DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Value); 1796 case CCValAssign::ZExt: 1797 return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); 1798 case CCValAssign::AExt: 1799 return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); 1800 case CCValAssign::BCvt: { 1801 assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128); 1802 assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f32 || 1803 VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::f128); 1804 // For an f32 vararg we need to first promote it to an f64 and then 1805 // bitcast it to an i64. 1806 if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i64) 1807 Value = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f64, Value); 1808 MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64 1809 ? MVT::v2i64 1810 : VA.getLocVT(); 1811 Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value); 1812 // For ELF, this is a short vector argument to be stored to the stack, 1813 // bitcast to v2i64 and then extract first element. 1814 if (BitCastToType == MVT::v2i64) 1815 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, 1816 DAG.getConstant(0, DL, MVT::i32)); 1817 return Value; 1818 } 1819 case CCValAssign::Full: 1820 return Value; 1821 default: 1822 llvm_unreachable("Unhandled getLocInfo()"); 1823 } 1824 } 1825 1826 static SDValue lowerI128ToGR128(SelectionDAG &DAG, SDValue In) { 1827 SDLoc DL(In); 1828 SDValue Lo, Hi; 1829 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { 1830 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, In); 1831 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, 1832 DAG.getNode(ISD::SRL, DL, MVT::i128, In, 1833 DAG.getConstant(64, DL, MVT::i32))); 1834 } else { 1835 std::tie(Lo, Hi) = DAG.SplitScalar(In, DL, MVT::i64, MVT::i64); 1836 } 1837 1838 // FIXME: If v2i64 were a legal type, we could use it instead of 1839 // Untyped here. This might enable improved folding. 1840 SDNode *Pair = DAG.getMachineNode(SystemZ::PAIR128, DL, 1841 MVT::Untyped, Hi, Lo); 1842 return SDValue(Pair, 0); 1843 } 1844 1845 static SDValue lowerGR128ToI128(SelectionDAG &DAG, SDValue In) { 1846 SDLoc DL(In); 1847 SDValue Hi = DAG.getTargetExtractSubreg(SystemZ::subreg_h64, 1848 DL, MVT::i64, In); 1849 SDValue Lo = DAG.getTargetExtractSubreg(SystemZ::subreg_l64, 1850 DL, MVT::i64, In); 1851 1852 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) { 1853 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Lo); 1854 Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i128, Hi); 1855 Hi = DAG.getNode(ISD::SHL, DL, MVT::i128, Hi, 1856 DAG.getConstant(64, DL, MVT::i32)); 1857 return DAG.getNode(ISD::OR, DL, MVT::i128, Lo, Hi); 1858 } else { 1859 return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i128, Lo, Hi); 1860 } 1861 } 1862 1863 bool SystemZTargetLowering::splitValueIntoRegisterParts( 1864 SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, 1865 unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC) const { 1866 EVT ValueVT = Val.getValueType(); 1867 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { 1868 // Inline assembly operand. 1869 Parts[0] = lowerI128ToGR128(DAG, DAG.getBitcast(MVT::i128, Val)); 1870 return true; 1871 } 1872 1873 return false; 1874 } 1875 1876 SDValue SystemZTargetLowering::joinRegisterPartsIntoValue( 1877 SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts, 1878 MVT PartVT, EVT ValueVT, std::optional<CallingConv::ID> CC) const { 1879 if (ValueVT.getSizeInBits() == 128 && NumParts == 1 && PartVT == MVT::Untyped) { 1880 // Inline assembly operand. 1881 SDValue Res = lowerGR128ToI128(DAG, Parts[0]); 1882 return DAG.getBitcast(ValueVT, Res); 1883 } 1884 1885 return SDValue(); 1886 } 1887 1888 SDValue SystemZTargetLowering::LowerFormalArguments( 1889 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, 1890 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL, 1891 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { 1892 MachineFunction &MF = DAG.getMachineFunction(); 1893 MachineFrameInfo &MFI = MF.getFrameInfo(); 1894 MachineRegisterInfo &MRI = MF.getRegInfo(); 1895 SystemZMachineFunctionInfo *FuncInfo = 1896 MF.getInfo<SystemZMachineFunctionInfo>(); 1897 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 1898 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 1899 1900 // Assign locations to all of the incoming arguments. 1901 SmallVector<CCValAssign, 16> ArgLocs; 1902 SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); 1903 CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); 1904 FuncInfo->setSizeOfFnParams(CCInfo.getStackSize()); 1905 1906 unsigned NumFixedGPRs = 0; 1907 unsigned NumFixedFPRs = 0; 1908 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 1909 SDValue ArgValue; 1910 CCValAssign &VA = ArgLocs[I]; 1911 EVT LocVT = VA.getLocVT(); 1912 if (VA.isRegLoc()) { 1913 // Arguments passed in registers 1914 const TargetRegisterClass *RC; 1915 switch (LocVT.getSimpleVT().SimpleTy) { 1916 default: 1917 // Integers smaller than i64 should be promoted to i64. 1918 llvm_unreachable("Unexpected argument type"); 1919 case MVT::i32: 1920 NumFixedGPRs += 1; 1921 RC = &SystemZ::GR32BitRegClass; 1922 break; 1923 case MVT::i64: 1924 NumFixedGPRs += 1; 1925 RC = &SystemZ::GR64BitRegClass; 1926 break; 1927 case MVT::f32: 1928 NumFixedFPRs += 1; 1929 RC = &SystemZ::FP32BitRegClass; 1930 break; 1931 case MVT::f64: 1932 NumFixedFPRs += 1; 1933 RC = &SystemZ::FP64BitRegClass; 1934 break; 1935 case MVT::f128: 1936 NumFixedFPRs += 2; 1937 RC = &SystemZ::FP128BitRegClass; 1938 break; 1939 case MVT::v16i8: 1940 case MVT::v8i16: 1941 case MVT::v4i32: 1942 case MVT::v2i64: 1943 case MVT::v4f32: 1944 case MVT::v2f64: 1945 RC = &SystemZ::VR128BitRegClass; 1946 break; 1947 } 1948 1949 Register VReg = MRI.createVirtualRegister(RC); 1950 MRI.addLiveIn(VA.getLocReg(), VReg); 1951 ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, LocVT); 1952 } else { 1953 assert(VA.isMemLoc() && "Argument not register or memory"); 1954 1955 // Create the frame index object for this incoming parameter. 1956 // FIXME: Pre-include call frame size in the offset, should not 1957 // need to manually add it here. 1958 int64_t ArgSPOffset = VA.getLocMemOffset(); 1959 if (Subtarget.isTargetXPLINK64()) { 1960 auto &XPRegs = 1961 Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); 1962 ArgSPOffset += XPRegs.getCallFrameSize(); 1963 } 1964 int FI = 1965 MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true); 1966 1967 // Create the SelectionDAG nodes corresponding to a load 1968 // from this parameter. Unpromoted ints and floats are 1969 // passed as right-justified 8-byte values. 1970 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1971 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 1972 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, 1973 DAG.getIntPtrConstant(4, DL)); 1974 ArgValue = DAG.getLoad(LocVT, DL, Chain, FIN, 1975 MachinePointerInfo::getFixedStack(MF, FI)); 1976 } 1977 1978 // Convert the value of the argument register into the value that's 1979 // being passed. 1980 if (VA.getLocInfo() == CCValAssign::Indirect) { 1981 InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, 1982 MachinePointerInfo())); 1983 // If the original argument was split (e.g. i128), we need 1984 // to load all parts of it here (using the same address). 1985 unsigned ArgIndex = Ins[I].OrigArgIndex; 1986 assert (Ins[I].PartOffset == 0); 1987 while (I + 1 != E && Ins[I + 1].OrigArgIndex == ArgIndex) { 1988 CCValAssign &PartVA = ArgLocs[I + 1]; 1989 unsigned PartOffset = Ins[I + 1].PartOffset; 1990 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue, 1991 DAG.getIntPtrConstant(PartOffset, DL)); 1992 InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address, 1993 MachinePointerInfo())); 1994 ++I; 1995 } 1996 } else 1997 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue)); 1998 } 1999 2000 if (IsVarArg && Subtarget.isTargetXPLINK64()) { 2001 // Save the number of non-varargs registers for later use by va_start, etc. 2002 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 2003 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 2004 2005 auto *Regs = static_cast<SystemZXPLINK64Registers *>( 2006 Subtarget.getSpecialRegisters()); 2007 2008 // Likewise the address (in the form of a frame index) of where the 2009 // first stack vararg would be. The 1-byte size here is arbitrary. 2010 // FIXME: Pre-include call frame size in the offset, should not 2011 // need to manually add it here. 2012 int64_t VarArgOffset = CCInfo.getStackSize() + Regs->getCallFrameSize(); 2013 int FI = MFI.CreateFixedObject(1, VarArgOffset, true); 2014 FuncInfo->setVarArgsFrameIndex(FI); 2015 } 2016 2017 if (IsVarArg && Subtarget.isTargetELF()) { 2018 // Save the number of non-varargs registers for later use by va_start, etc. 2019 FuncInfo->setVarArgsFirstGPR(NumFixedGPRs); 2020 FuncInfo->setVarArgsFirstFPR(NumFixedFPRs); 2021 2022 // Likewise the address (in the form of a frame index) of where the 2023 // first stack vararg would be. The 1-byte size here is arbitrary. 2024 int64_t VarArgsOffset = CCInfo.getStackSize(); 2025 FuncInfo->setVarArgsFrameIndex( 2026 MFI.CreateFixedObject(1, VarArgsOffset, true)); 2027 2028 // ...and a similar frame index for the caller-allocated save area 2029 // that will be used to store the incoming registers. 2030 int64_t RegSaveOffset = 2031 -SystemZMC::ELFCallFrameSize + TFL->getRegSpillOffset(MF, SystemZ::R2D) - 16; 2032 unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true); 2033 FuncInfo->setRegSaveFrameIndex(RegSaveIndex); 2034 2035 // Store the FPR varargs in the reserved frame slots. (We store the 2036 // GPRs as part of the prologue.) 2037 if (NumFixedFPRs < SystemZ::ELFNumArgFPRs && !useSoftFloat()) { 2038 SDValue MemOps[SystemZ::ELFNumArgFPRs]; 2039 for (unsigned I = NumFixedFPRs; I < SystemZ::ELFNumArgFPRs; ++I) { 2040 unsigned Offset = TFL->getRegSpillOffset(MF, SystemZ::ELFArgFPRs[I]); 2041 int FI = 2042 MFI.CreateFixedObject(8, -SystemZMC::ELFCallFrameSize + Offset, true); 2043 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); 2044 Register VReg = MF.addLiveIn(SystemZ::ELFArgFPRs[I], 2045 &SystemZ::FP64BitRegClass); 2046 SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f64); 2047 MemOps[I] = DAG.getStore(ArgValue.getValue(1), DL, ArgValue, FIN, 2048 MachinePointerInfo::getFixedStack(MF, FI)); 2049 } 2050 // Join the stores, which are independent of one another. 2051 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 2052 ArrayRef(&MemOps[NumFixedFPRs], 2053 SystemZ::ELFNumArgFPRs - NumFixedFPRs)); 2054 } 2055 } 2056 2057 if (Subtarget.isTargetXPLINK64()) { 2058 // Create virual register for handling incoming "ADA" special register (R5) 2059 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 2060 Register ADAvReg = MRI.createVirtualRegister(RC); 2061 auto *Regs = static_cast<SystemZXPLINK64Registers *>( 2062 Subtarget.getSpecialRegisters()); 2063 MRI.addLiveIn(Regs->getADARegister(), ADAvReg); 2064 FuncInfo->setADAVirtualRegister(ADAvReg); 2065 } 2066 return Chain; 2067 } 2068 2069 static bool canUseSiblingCall(const CCState &ArgCCInfo, 2070 SmallVectorImpl<CCValAssign> &ArgLocs, 2071 SmallVectorImpl<ISD::OutputArg> &Outs) { 2072 // Punt if there are any indirect or stack arguments, or if the call 2073 // needs the callee-saved argument register R6, or if the call uses 2074 // the callee-saved register arguments SwiftSelf and SwiftError. 2075 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 2076 CCValAssign &VA = ArgLocs[I]; 2077 if (VA.getLocInfo() == CCValAssign::Indirect) 2078 return false; 2079 if (!VA.isRegLoc()) 2080 return false; 2081 Register Reg = VA.getLocReg(); 2082 if (Reg == SystemZ::R6H || Reg == SystemZ::R6L || Reg == SystemZ::R6D) 2083 return false; 2084 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftError()) 2085 return false; 2086 } 2087 return true; 2088 } 2089 2090 static SDValue getADAEntry(SelectionDAG &DAG, SDValue Val, SDLoc DL, 2091 unsigned Offset, bool LoadAdr = false) { 2092 MachineFunction &MF = DAG.getMachineFunction(); 2093 SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>(); 2094 unsigned ADAvReg = MFI->getADAVirtualRegister(); 2095 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 2096 2097 SDValue Reg = DAG.getRegister(ADAvReg, PtrVT); 2098 SDValue Ofs = DAG.getTargetConstant(Offset, DL, PtrVT); 2099 2100 SDValue Result = DAG.getNode(SystemZISD::ADA_ENTRY, DL, PtrVT, Val, Reg, Ofs); 2101 if (!LoadAdr) 2102 Result = DAG.getLoad( 2103 PtrVT, DL, DAG.getEntryNode(), Result, MachinePointerInfo(), Align(8), 2104 MachineMemOperand::MODereferenceable | MachineMemOperand::MOInvariant); 2105 2106 return Result; 2107 } 2108 2109 // ADA access using Global value 2110 // Note: for functions, address of descriptor is returned 2111 static SDValue getADAEntry(SelectionDAG &DAG, const GlobalValue *GV, SDLoc DL, 2112 EVT PtrVT) { 2113 unsigned ADAtype; 2114 bool LoadAddr = false; 2115 const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV); 2116 bool IsFunction = 2117 (isa<Function>(GV)) || (GA && isa<Function>(GA->getAliaseeObject())); 2118 bool IsInternal = (GV->hasInternalLinkage() || GV->hasPrivateLinkage()); 2119 2120 if (IsFunction) { 2121 if (IsInternal) { 2122 ADAtype = SystemZII::MO_ADA_DIRECT_FUNC_DESC; 2123 LoadAddr = true; 2124 } else 2125 ADAtype = SystemZII::MO_ADA_INDIRECT_FUNC_DESC; 2126 } else { 2127 ADAtype = SystemZII::MO_ADA_DATA_SYMBOL_ADDR; 2128 } 2129 SDValue Val = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ADAtype); 2130 2131 return getADAEntry(DAG, Val, DL, 0, LoadAddr); 2132 } 2133 2134 static bool getzOSCalleeAndADA(SelectionDAG &DAG, SDValue &Callee, SDValue &ADA, 2135 SDLoc &DL, SDValue &Chain) { 2136 unsigned ADADelta = 0; // ADA offset in desc. 2137 unsigned EPADelta = 8; // EPA offset in desc. 2138 MachineFunction &MF = DAG.getMachineFunction(); 2139 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); 2140 2141 // XPLink calling convention. 2142 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2143 bool IsInternal = (G->getGlobal()->hasInternalLinkage() || 2144 G->getGlobal()->hasPrivateLinkage()); 2145 if (IsInternal) { 2146 SystemZMachineFunctionInfo *MFI = 2147 MF.getInfo<SystemZMachineFunctionInfo>(); 2148 unsigned ADAvReg = MFI->getADAVirtualRegister(); 2149 ADA = DAG.getCopyFromReg(Chain, DL, ADAvReg, PtrVT); 2150 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 2151 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 2152 return true; 2153 } else { 2154 SDValue GA = DAG.getTargetGlobalAddress( 2155 G->getGlobal(), DL, PtrVT, 0, SystemZII::MO_ADA_DIRECT_FUNC_DESC); 2156 ADA = getADAEntry(DAG, GA, DL, ADADelta); 2157 Callee = getADAEntry(DAG, GA, DL, EPADelta); 2158 } 2159 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2160 SDValue ES = DAG.getTargetExternalSymbol( 2161 E->getSymbol(), PtrVT, SystemZII::MO_ADA_DIRECT_FUNC_DESC); 2162 ADA = getADAEntry(DAG, ES, DL, ADADelta); 2163 Callee = getADAEntry(DAG, ES, DL, EPADelta); 2164 } else { 2165 // Function pointer case 2166 ADA = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, 2167 DAG.getConstant(ADADelta, DL, PtrVT)); 2168 ADA = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ADA, 2169 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 2170 Callee = DAG.getNode(ISD::ADD, DL, PtrVT, Callee, 2171 DAG.getConstant(EPADelta, DL, PtrVT)); 2172 Callee = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Callee, 2173 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 2174 } 2175 return false; 2176 } 2177 2178 SDValue 2179 SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, 2180 SmallVectorImpl<SDValue> &InVals) const { 2181 SelectionDAG &DAG = CLI.DAG; 2182 SDLoc &DL = CLI.DL; 2183 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 2184 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 2185 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 2186 SDValue Chain = CLI.Chain; 2187 SDValue Callee = CLI.Callee; 2188 bool &IsTailCall = CLI.IsTailCall; 2189 CallingConv::ID CallConv = CLI.CallConv; 2190 bool IsVarArg = CLI.IsVarArg; 2191 MachineFunction &MF = DAG.getMachineFunction(); 2192 EVT PtrVT = getPointerTy(MF.getDataLayout()); 2193 LLVMContext &Ctx = *DAG.getContext(); 2194 SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters(); 2195 2196 // FIXME: z/OS support to be added in later. 2197 if (Subtarget.isTargetXPLINK64()) 2198 IsTailCall = false; 2199 2200 // Integer args <=32 bits should have an extension attribute. 2201 verifyNarrowIntegerArgs_Call(Outs, &MF.getFunction(), Callee); 2202 2203 // Analyze the operands of the call, assigning locations to each operand. 2204 SmallVector<CCValAssign, 16> ArgLocs; 2205 SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx); 2206 ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); 2207 2208 // We don't support GuaranteedTailCallOpt, only automatically-detected 2209 // sibling calls. 2210 if (IsTailCall && !canUseSiblingCall(ArgCCInfo, ArgLocs, Outs)) 2211 IsTailCall = false; 2212 2213 // Get a count of how many bytes are to be pushed on the stack. 2214 unsigned NumBytes = ArgCCInfo.getStackSize(); 2215 2216 // Mark the start of the call. 2217 if (!IsTailCall) 2218 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL); 2219 2220 // Copy argument values to their designated locations. 2221 SmallVector<std::pair<unsigned, SDValue>, 9> RegsToPass; 2222 SmallVector<SDValue, 8> MemOpChains; 2223 SDValue StackPtr; 2224 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) { 2225 CCValAssign &VA = ArgLocs[I]; 2226 SDValue ArgValue = OutVals[I]; 2227 2228 if (VA.getLocInfo() == CCValAssign::Indirect) { 2229 // Store the argument in a stack slot and pass its address. 2230 unsigned ArgIndex = Outs[I].OrigArgIndex; 2231 EVT SlotVT; 2232 if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { 2233 // Allocate the full stack space for a promoted (and split) argument. 2234 Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty; 2235 EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType); 2236 MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT); 2237 unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT); 2238 SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N); 2239 } else { 2240 SlotVT = Outs[I].VT; 2241 } 2242 SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT); 2243 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); 2244 MemOpChains.push_back( 2245 DAG.getStore(Chain, DL, ArgValue, SpillSlot, 2246 MachinePointerInfo::getFixedStack(MF, FI))); 2247 // If the original argument was split (e.g. i128), we need 2248 // to store all parts of it here (and pass just one address). 2249 assert (Outs[I].PartOffset == 0); 2250 while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) { 2251 SDValue PartValue = OutVals[I + 1]; 2252 unsigned PartOffset = Outs[I + 1].PartOffset; 2253 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot, 2254 DAG.getIntPtrConstant(PartOffset, DL)); 2255 MemOpChains.push_back( 2256 DAG.getStore(Chain, DL, PartValue, Address, 2257 MachinePointerInfo::getFixedStack(MF, FI))); 2258 assert((PartOffset + PartValue.getValueType().getStoreSize() <= 2259 SlotVT.getStoreSize()) && "Not enough space for argument part!"); 2260 ++I; 2261 } 2262 ArgValue = SpillSlot; 2263 } else 2264 ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue); 2265 2266 if (VA.isRegLoc()) { 2267 // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a 2268 // MVT::i128 type. We decompose the 128-bit type to a pair of its high 2269 // and low values. 2270 if (VA.getLocVT() == MVT::i128) 2271 ArgValue = lowerI128ToGR128(DAG, ArgValue); 2272 // Queue up the argument copies and emit them at the end. 2273 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue)); 2274 } else { 2275 assert(VA.isMemLoc() && "Argument not register or memory"); 2276 2277 // Work out the address of the stack slot. Unpromoted ints and 2278 // floats are passed as right-justified 8-byte values. 2279 if (!StackPtr.getNode()) 2280 StackPtr = DAG.getCopyFromReg(Chain, DL, 2281 Regs->getStackPointerRegister(), PtrVT); 2282 unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() + 2283 VA.getLocMemOffset(); 2284 if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32) 2285 Offset += 4; 2286 SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, 2287 DAG.getIntPtrConstant(Offset, DL)); 2288 2289 // Emit the store. 2290 MemOpChains.push_back( 2291 DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo())); 2292 2293 // Although long doubles or vectors are passed through the stack when 2294 // they are vararg (non-fixed arguments), if a long double or vector 2295 // occupies the third and fourth slot of the argument list GPR3 should 2296 // still shadow the third slot of the argument list. 2297 if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) { 2298 SDValue ShadowArgValue = 2299 DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue, 2300 DAG.getIntPtrConstant(1, DL)); 2301 RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue)); 2302 } 2303 } 2304 } 2305 2306 // Join the stores, which are independent of one another. 2307 if (!MemOpChains.empty()) 2308 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains); 2309 2310 // Accept direct calls by converting symbolic call addresses to the 2311 // associated Target* opcodes. Force %r1 to be used for indirect 2312 // tail calls. 2313 SDValue Glue; 2314 2315 if (Subtarget.isTargetXPLINK64()) { 2316 SDValue ADA; 2317 bool IsBRASL = getzOSCalleeAndADA(DAG, Callee, ADA, DL, Chain); 2318 if (!IsBRASL) { 2319 unsigned CalleeReg = static_cast<SystemZXPLINK64Registers *>(Regs) 2320 ->getAddressOfCalleeRegister(); 2321 Chain = DAG.getCopyToReg(Chain, DL, CalleeReg, Callee, Glue); 2322 Glue = Chain.getValue(1); 2323 Callee = DAG.getRegister(CalleeReg, Callee.getValueType()); 2324 } 2325 RegsToPass.push_back(std::make_pair( 2326 static_cast<SystemZXPLINK64Registers *>(Regs)->getADARegister(), ADA)); 2327 } else { 2328 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 2329 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT); 2330 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 2331 } else if (auto *E = dyn_cast<ExternalSymbolSDNode>(Callee)) { 2332 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT); 2333 Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee); 2334 } else if (IsTailCall) { 2335 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R1D, Callee, Glue); 2336 Glue = Chain.getValue(1); 2337 Callee = DAG.getRegister(SystemZ::R1D, Callee.getValueType()); 2338 } 2339 } 2340 2341 // Build a sequence of copy-to-reg nodes, chained and glued together. 2342 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) { 2343 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first, 2344 RegsToPass[I].second, Glue); 2345 Glue = Chain.getValue(1); 2346 } 2347 2348 // The first call operand is the chain and the second is the target address. 2349 SmallVector<SDValue, 8> Ops; 2350 Ops.push_back(Chain); 2351 Ops.push_back(Callee); 2352 2353 // Add argument registers to the end of the list so that they are 2354 // known live into the call. 2355 for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) 2356 Ops.push_back(DAG.getRegister(RegsToPass[I].first, 2357 RegsToPass[I].second.getValueType())); 2358 2359 // Add a register mask operand representing the call-preserved registers. 2360 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2361 const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv); 2362 assert(Mask && "Missing call preserved mask for calling convention"); 2363 Ops.push_back(DAG.getRegisterMask(Mask)); 2364 2365 // Glue the call to the argument copies, if any. 2366 if (Glue.getNode()) 2367 Ops.push_back(Glue); 2368 2369 // Emit the call. 2370 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 2371 if (IsTailCall) { 2372 SDValue Ret = DAG.getNode(SystemZISD::SIBCALL, DL, NodeTys, Ops); 2373 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge); 2374 return Ret; 2375 } 2376 Chain = DAG.getNode(SystemZISD::CALL, DL, NodeTys, Ops); 2377 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); 2378 Glue = Chain.getValue(1); 2379 2380 // Mark the end of the call, which is glued to the call itself. 2381 Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); 2382 Glue = Chain.getValue(1); 2383 2384 // Assign locations to each value returned by this call. 2385 SmallVector<CCValAssign, 16> RetLocs; 2386 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx); 2387 RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); 2388 2389 // Copy all of the result registers out of their specified physreg. 2390 for (CCValAssign &VA : RetLocs) { 2391 // Copy the value out, gluing the copy to the end of the call sequence. 2392 SDValue RetValue = DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), 2393 VA.getLocVT(), Glue); 2394 Chain = RetValue.getValue(1); 2395 Glue = RetValue.getValue(2); 2396 2397 // Convert the value of the return register into the value that's 2398 // being returned. 2399 InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, RetValue)); 2400 } 2401 2402 return Chain; 2403 } 2404 2405 // Generate a call taking the given operands as arguments and returning a 2406 // result of type RetVT. 2407 std::pair<SDValue, SDValue> SystemZTargetLowering::makeExternalCall( 2408 SDValue Chain, SelectionDAG &DAG, const char *CalleeName, EVT RetVT, 2409 ArrayRef<SDValue> Ops, CallingConv::ID CallConv, bool IsSigned, SDLoc DL, 2410 bool DoesNotReturn, bool IsReturnValueUsed) const { 2411 TargetLowering::ArgListTy Args; 2412 Args.reserve(Ops.size()); 2413 2414 TargetLowering::ArgListEntry Entry; 2415 for (SDValue Op : Ops) { 2416 Entry.Node = Op; 2417 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); 2418 Entry.IsSExt = shouldSignExtendTypeInLibCall(Entry.Ty, IsSigned); 2419 Entry.IsZExt = !Entry.IsSExt; 2420 Args.push_back(Entry); 2421 } 2422 2423 SDValue Callee = 2424 DAG.getExternalSymbol(CalleeName, getPointerTy(DAG.getDataLayout())); 2425 2426 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); 2427 TargetLowering::CallLoweringInfo CLI(DAG); 2428 bool SignExtend = shouldSignExtendTypeInLibCall(RetTy, IsSigned); 2429 CLI.setDebugLoc(DL) 2430 .setChain(Chain) 2431 .setCallee(CallConv, RetTy, Callee, std::move(Args)) 2432 .setNoReturn(DoesNotReturn) 2433 .setDiscardResult(!IsReturnValueUsed) 2434 .setSExtResult(SignExtend) 2435 .setZExtResult(!SignExtend); 2436 return LowerCallTo(CLI); 2437 } 2438 2439 bool SystemZTargetLowering:: 2440 CanLowerReturn(CallingConv::ID CallConv, 2441 MachineFunction &MF, bool isVarArg, 2442 const SmallVectorImpl<ISD::OutputArg> &Outs, 2443 LLVMContext &Context, 2444 const Type *RetTy) const { 2445 // Special case that we cannot easily detect in RetCC_SystemZ since 2446 // i128 may not be a legal type. 2447 for (auto &Out : Outs) 2448 if (Out.ArgVT == MVT::i128) 2449 return false; 2450 2451 SmallVector<CCValAssign, 16> RetLocs; 2452 CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); 2453 return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); 2454 } 2455 2456 SDValue 2457 SystemZTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, 2458 bool IsVarArg, 2459 const SmallVectorImpl<ISD::OutputArg> &Outs, 2460 const SmallVectorImpl<SDValue> &OutVals, 2461 const SDLoc &DL, SelectionDAG &DAG) const { 2462 MachineFunction &MF = DAG.getMachineFunction(); 2463 2464 // Integer args <=32 bits should have an extension attribute. 2465 verifyNarrowIntegerArgs_Ret(Outs, &MF.getFunction()); 2466 2467 // Assign locations to each returned value. 2468 SmallVector<CCValAssign, 16> RetLocs; 2469 CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); 2470 RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); 2471 2472 // Quick exit for void returns 2473 if (RetLocs.empty()) 2474 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, Chain); 2475 2476 if (CallConv == CallingConv::GHC) 2477 report_fatal_error("GHC functions return void only"); 2478 2479 // Copy the result values into the output registers. 2480 SDValue Glue; 2481 SmallVector<SDValue, 4> RetOps; 2482 RetOps.push_back(Chain); 2483 for (unsigned I = 0, E = RetLocs.size(); I != E; ++I) { 2484 CCValAssign &VA = RetLocs[I]; 2485 SDValue RetValue = OutVals[I]; 2486 2487 // Make the return register live on exit. 2488 assert(VA.isRegLoc() && "Can only return in registers!"); 2489 2490 // Promote the value as required. 2491 RetValue = convertValVTToLocVT(DAG, DL, VA, RetValue); 2492 2493 // Chain and glue the copies together. 2494 Register Reg = VA.getLocReg(); 2495 Chain = DAG.getCopyToReg(Chain, DL, Reg, RetValue, Glue); 2496 Glue = Chain.getValue(1); 2497 RetOps.push_back(DAG.getRegister(Reg, VA.getLocVT())); 2498 } 2499 2500 // Update chain and glue. 2501 RetOps[0] = Chain; 2502 if (Glue.getNode()) 2503 RetOps.push_back(Glue); 2504 2505 return DAG.getNode(SystemZISD::RET_GLUE, DL, MVT::Other, RetOps); 2506 } 2507 2508 // Return true if Op is an intrinsic node with chain that returns the CC value 2509 // as its only (other) argument. Provide the associated SystemZISD opcode and 2510 // the mask of valid CC values if so. 2511 static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, 2512 unsigned &CCValid) { 2513 unsigned Id = Op.getConstantOperandVal(1); 2514 switch (Id) { 2515 case Intrinsic::s390_tbegin: 2516 Opcode = SystemZISD::TBEGIN; 2517 CCValid = SystemZ::CCMASK_TBEGIN; 2518 return true; 2519 2520 case Intrinsic::s390_tbegin_nofloat: 2521 Opcode = SystemZISD::TBEGIN_NOFLOAT; 2522 CCValid = SystemZ::CCMASK_TBEGIN; 2523 return true; 2524 2525 case Intrinsic::s390_tend: 2526 Opcode = SystemZISD::TEND; 2527 CCValid = SystemZ::CCMASK_TEND; 2528 return true; 2529 2530 default: 2531 return false; 2532 } 2533 } 2534 2535 // Return true if Op is an intrinsic node without chain that returns the 2536 // CC value as its final argument. Provide the associated SystemZISD 2537 // opcode and the mask of valid CC values if so. 2538 static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { 2539 unsigned Id = Op.getConstantOperandVal(0); 2540 switch (Id) { 2541 case Intrinsic::s390_vpkshs: 2542 case Intrinsic::s390_vpksfs: 2543 case Intrinsic::s390_vpksgs: 2544 Opcode = SystemZISD::PACKS_CC; 2545 CCValid = SystemZ::CCMASK_VCMP; 2546 return true; 2547 2548 case Intrinsic::s390_vpklshs: 2549 case Intrinsic::s390_vpklsfs: 2550 case Intrinsic::s390_vpklsgs: 2551 Opcode = SystemZISD::PACKLS_CC; 2552 CCValid = SystemZ::CCMASK_VCMP; 2553 return true; 2554 2555 case Intrinsic::s390_vceqbs: 2556 case Intrinsic::s390_vceqhs: 2557 case Intrinsic::s390_vceqfs: 2558 case Intrinsic::s390_vceqgs: 2559 case Intrinsic::s390_vceqqs: 2560 Opcode = SystemZISD::VICMPES; 2561 CCValid = SystemZ::CCMASK_VCMP; 2562 return true; 2563 2564 case Intrinsic::s390_vchbs: 2565 case Intrinsic::s390_vchhs: 2566 case Intrinsic::s390_vchfs: 2567 case Intrinsic::s390_vchgs: 2568 case Intrinsic::s390_vchqs: 2569 Opcode = SystemZISD::VICMPHS; 2570 CCValid = SystemZ::CCMASK_VCMP; 2571 return true; 2572 2573 case Intrinsic::s390_vchlbs: 2574 case Intrinsic::s390_vchlhs: 2575 case Intrinsic::s390_vchlfs: 2576 case Intrinsic::s390_vchlgs: 2577 case Intrinsic::s390_vchlqs: 2578 Opcode = SystemZISD::VICMPHLS; 2579 CCValid = SystemZ::CCMASK_VCMP; 2580 return true; 2581 2582 case Intrinsic::s390_vtm: 2583 Opcode = SystemZISD::VTM; 2584 CCValid = SystemZ::CCMASK_VCMP; 2585 return true; 2586 2587 case Intrinsic::s390_vfaebs: 2588 case Intrinsic::s390_vfaehs: 2589 case Intrinsic::s390_vfaefs: 2590 Opcode = SystemZISD::VFAE_CC; 2591 CCValid = SystemZ::CCMASK_ANY; 2592 return true; 2593 2594 case Intrinsic::s390_vfaezbs: 2595 case Intrinsic::s390_vfaezhs: 2596 case Intrinsic::s390_vfaezfs: 2597 Opcode = SystemZISD::VFAEZ_CC; 2598 CCValid = SystemZ::CCMASK_ANY; 2599 return true; 2600 2601 case Intrinsic::s390_vfeebs: 2602 case Intrinsic::s390_vfeehs: 2603 case Intrinsic::s390_vfeefs: 2604 Opcode = SystemZISD::VFEE_CC; 2605 CCValid = SystemZ::CCMASK_ANY; 2606 return true; 2607 2608 case Intrinsic::s390_vfeezbs: 2609 case Intrinsic::s390_vfeezhs: 2610 case Intrinsic::s390_vfeezfs: 2611 Opcode = SystemZISD::VFEEZ_CC; 2612 CCValid = SystemZ::CCMASK_ANY; 2613 return true; 2614 2615 case Intrinsic::s390_vfenebs: 2616 case Intrinsic::s390_vfenehs: 2617 case Intrinsic::s390_vfenefs: 2618 Opcode = SystemZISD::VFENE_CC; 2619 CCValid = SystemZ::CCMASK_ANY; 2620 return true; 2621 2622 case Intrinsic::s390_vfenezbs: 2623 case Intrinsic::s390_vfenezhs: 2624 case Intrinsic::s390_vfenezfs: 2625 Opcode = SystemZISD::VFENEZ_CC; 2626 CCValid = SystemZ::CCMASK_ANY; 2627 return true; 2628 2629 case Intrinsic::s390_vistrbs: 2630 case Intrinsic::s390_vistrhs: 2631 case Intrinsic::s390_vistrfs: 2632 Opcode = SystemZISD::VISTR_CC; 2633 CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; 2634 return true; 2635 2636 case Intrinsic::s390_vstrcbs: 2637 case Intrinsic::s390_vstrchs: 2638 case Intrinsic::s390_vstrcfs: 2639 Opcode = SystemZISD::VSTRC_CC; 2640 CCValid = SystemZ::CCMASK_ANY; 2641 return true; 2642 2643 case Intrinsic::s390_vstrczbs: 2644 case Intrinsic::s390_vstrczhs: 2645 case Intrinsic::s390_vstrczfs: 2646 Opcode = SystemZISD::VSTRCZ_CC; 2647 CCValid = SystemZ::CCMASK_ANY; 2648 return true; 2649 2650 case Intrinsic::s390_vstrsb: 2651 case Intrinsic::s390_vstrsh: 2652 case Intrinsic::s390_vstrsf: 2653 Opcode = SystemZISD::VSTRS_CC; 2654 CCValid = SystemZ::CCMASK_ANY; 2655 return true; 2656 2657 case Intrinsic::s390_vstrszb: 2658 case Intrinsic::s390_vstrszh: 2659 case Intrinsic::s390_vstrszf: 2660 Opcode = SystemZISD::VSTRSZ_CC; 2661 CCValid = SystemZ::CCMASK_ANY; 2662 return true; 2663 2664 case Intrinsic::s390_vfcedbs: 2665 case Intrinsic::s390_vfcesbs: 2666 Opcode = SystemZISD::VFCMPES; 2667 CCValid = SystemZ::CCMASK_VCMP; 2668 return true; 2669 2670 case Intrinsic::s390_vfchdbs: 2671 case Intrinsic::s390_vfchsbs: 2672 Opcode = SystemZISD::VFCMPHS; 2673 CCValid = SystemZ::CCMASK_VCMP; 2674 return true; 2675 2676 case Intrinsic::s390_vfchedbs: 2677 case Intrinsic::s390_vfchesbs: 2678 Opcode = SystemZISD::VFCMPHES; 2679 CCValid = SystemZ::CCMASK_VCMP; 2680 return true; 2681 2682 case Intrinsic::s390_vftcidb: 2683 case Intrinsic::s390_vftcisb: 2684 Opcode = SystemZISD::VFTCI; 2685 CCValid = SystemZ::CCMASK_VCMP; 2686 return true; 2687 2688 case Intrinsic::s390_tdc: 2689 Opcode = SystemZISD::TDC; 2690 CCValid = SystemZ::CCMASK_TDC; 2691 return true; 2692 2693 default: 2694 return false; 2695 } 2696 } 2697 2698 // Emit an intrinsic with chain and an explicit CC register result. 2699 static SDNode *emitIntrinsicWithCCAndChain(SelectionDAG &DAG, SDValue Op, 2700 unsigned Opcode) { 2701 // Copy all operands except the intrinsic ID. 2702 unsigned NumOps = Op.getNumOperands(); 2703 SmallVector<SDValue, 6> Ops; 2704 Ops.reserve(NumOps - 1); 2705 Ops.push_back(Op.getOperand(0)); 2706 for (unsigned I = 2; I < NumOps; ++I) 2707 Ops.push_back(Op.getOperand(I)); 2708 2709 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 2710 SDVTList RawVTs = DAG.getVTList(MVT::i32, MVT::Other); 2711 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); 2712 SDValue OldChain = SDValue(Op.getNode(), 1); 2713 SDValue NewChain = SDValue(Intr.getNode(), 1); 2714 DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); 2715 return Intr.getNode(); 2716 } 2717 2718 // Emit an intrinsic with an explicit CC register result. 2719 static SDNode *emitIntrinsicWithCC(SelectionDAG &DAG, SDValue Op, 2720 unsigned Opcode) { 2721 // Copy all operands except the intrinsic ID. 2722 unsigned NumOps = Op.getNumOperands(); 2723 SmallVector<SDValue, 6> Ops; 2724 Ops.reserve(NumOps - 1); 2725 for (unsigned I = 1; I < NumOps; ++I) 2726 Ops.push_back(Op.getOperand(I)); 2727 2728 SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), Op->getVTList(), Ops); 2729 return Intr.getNode(); 2730 } 2731 2732 // CC is a comparison that will be implemented using an integer or 2733 // floating-point comparison. Return the condition code mask for 2734 // a branch on true. In the integer case, CCMASK_CMP_UO is set for 2735 // unsigned comparisons and clear for signed ones. In the floating-point 2736 // case, CCMASK_CMP_UO has its normal mask meaning (unordered). 2737 static unsigned CCMaskForCondCode(ISD::CondCode CC) { 2738 #define CONV(X) \ 2739 case ISD::SET##X: return SystemZ::CCMASK_CMP_##X; \ 2740 case ISD::SETO##X: return SystemZ::CCMASK_CMP_##X; \ 2741 case ISD::SETU##X: return SystemZ::CCMASK_CMP_UO | SystemZ::CCMASK_CMP_##X 2742 2743 switch (CC) { 2744 default: 2745 llvm_unreachable("Invalid integer condition!"); 2746 2747 CONV(EQ); 2748 CONV(NE); 2749 CONV(GT); 2750 CONV(GE); 2751 CONV(LT); 2752 CONV(LE); 2753 2754 case ISD::SETO: return SystemZ::CCMASK_CMP_O; 2755 case ISD::SETUO: return SystemZ::CCMASK_CMP_UO; 2756 } 2757 #undef CONV 2758 } 2759 2760 // If C can be converted to a comparison against zero, adjust the operands 2761 // as necessary. 2762 static void adjustZeroCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 2763 if (C.ICmpType == SystemZICMP::UnsignedOnly) 2764 return; 2765 2766 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1.getNode()); 2767 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) 2768 return; 2769 2770 int64_t Value = ConstOp1->getSExtValue(); 2771 if ((Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_GT) || 2772 (Value == -1 && C.CCMask == SystemZ::CCMASK_CMP_LE) || 2773 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_LT) || 2774 (Value == 1 && C.CCMask == SystemZ::CCMASK_CMP_GE)) { 2775 C.CCMask ^= SystemZ::CCMASK_CMP_EQ; 2776 C.Op1 = DAG.getConstant(0, DL, C.Op1.getValueType()); 2777 } 2778 } 2779 2780 // If a comparison described by C is suitable for CLI(Y), CHHSI or CLHHSI, 2781 // adjust the operands as necessary. 2782 static void adjustSubwordCmp(SelectionDAG &DAG, const SDLoc &DL, 2783 Comparison &C) { 2784 // For us to make any changes, it must a comparison between a single-use 2785 // load and a constant. 2786 if (!C.Op0.hasOneUse() || 2787 C.Op0.getOpcode() != ISD::LOAD || 2788 C.Op1.getOpcode() != ISD::Constant) 2789 return; 2790 2791 // We must have an 8- or 16-bit load. 2792 auto *Load = cast<LoadSDNode>(C.Op0); 2793 unsigned NumBits = Load->getMemoryVT().getSizeInBits(); 2794 if ((NumBits != 8 && NumBits != 16) || 2795 NumBits != Load->getMemoryVT().getStoreSizeInBits()) 2796 return; 2797 2798 // The load must be an extending one and the constant must be within the 2799 // range of the unextended value. 2800 auto *ConstOp1 = cast<ConstantSDNode>(C.Op1); 2801 if (!ConstOp1 || ConstOp1->getValueSizeInBits(0) > 64) 2802 return; 2803 uint64_t Value = ConstOp1->getZExtValue(); 2804 uint64_t Mask = (1 << NumBits) - 1; 2805 if (Load->getExtensionType() == ISD::SEXTLOAD) { 2806 // Make sure that ConstOp1 is in range of C.Op0. 2807 int64_t SignedValue = ConstOp1->getSExtValue(); 2808 if (uint64_t(SignedValue) + (uint64_t(1) << (NumBits - 1)) > Mask) 2809 return; 2810 if (C.ICmpType != SystemZICMP::SignedOnly) { 2811 // Unsigned comparison between two sign-extended values is equivalent 2812 // to unsigned comparison between two zero-extended values. 2813 Value &= Mask; 2814 } else if (NumBits == 8) { 2815 // Try to treat the comparison as unsigned, so that we can use CLI. 2816 // Adjust CCMask and Value as necessary. 2817 if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_LT) 2818 // Test whether the high bit of the byte is set. 2819 Value = 127, C.CCMask = SystemZ::CCMASK_CMP_GT; 2820 else if (Value == 0 && C.CCMask == SystemZ::CCMASK_CMP_GE) 2821 // Test whether the high bit of the byte is clear. 2822 Value = 128, C.CCMask = SystemZ::CCMASK_CMP_LT; 2823 else 2824 // No instruction exists for this combination. 2825 return; 2826 C.ICmpType = SystemZICMP::UnsignedOnly; 2827 } 2828 } else if (Load->getExtensionType() == ISD::ZEXTLOAD) { 2829 if (Value > Mask) 2830 return; 2831 // If the constant is in range, we can use any comparison. 2832 C.ICmpType = SystemZICMP::Any; 2833 } else 2834 return; 2835 2836 // Make sure that the first operand is an i32 of the right extension type. 2837 ISD::LoadExtType ExtType = (C.ICmpType == SystemZICMP::SignedOnly ? 2838 ISD::SEXTLOAD : 2839 ISD::ZEXTLOAD); 2840 if (C.Op0.getValueType() != MVT::i32 || 2841 Load->getExtensionType() != ExtType) { 2842 C.Op0 = DAG.getExtLoad(ExtType, SDLoc(Load), MVT::i32, Load->getChain(), 2843 Load->getBasePtr(), Load->getPointerInfo(), 2844 Load->getMemoryVT(), Load->getAlign(), 2845 Load->getMemOperand()->getFlags()); 2846 // Update the chain uses. 2847 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), C.Op0.getValue(1)); 2848 } 2849 2850 // Make sure that the second operand is an i32 with the right value. 2851 if (C.Op1.getValueType() != MVT::i32 || 2852 Value != ConstOp1->getZExtValue()) 2853 C.Op1 = DAG.getConstant((uint32_t)Value, DL, MVT::i32); 2854 } 2855 2856 // Return true if Op is either an unextended load, or a load suitable 2857 // for integer register-memory comparisons of type ICmpType. 2858 static bool isNaturalMemoryOperand(SDValue Op, unsigned ICmpType) { 2859 auto *Load = dyn_cast<LoadSDNode>(Op.getNode()); 2860 if (Load) { 2861 // There are no instructions to compare a register with a memory byte. 2862 if (Load->getMemoryVT() == MVT::i8) 2863 return false; 2864 // Otherwise decide on extension type. 2865 switch (Load->getExtensionType()) { 2866 case ISD::NON_EXTLOAD: 2867 return true; 2868 case ISD::SEXTLOAD: 2869 return ICmpType != SystemZICMP::UnsignedOnly; 2870 case ISD::ZEXTLOAD: 2871 return ICmpType != SystemZICMP::SignedOnly; 2872 default: 2873 break; 2874 } 2875 } 2876 return false; 2877 } 2878 2879 // Return true if it is better to swap the operands of C. 2880 static bool shouldSwapCmpOperands(const Comparison &C) { 2881 // Leave i128 and f128 comparisons alone, since they have no memory forms. 2882 if (C.Op0.getValueType() == MVT::i128) 2883 return false; 2884 if (C.Op0.getValueType() == MVT::f128) 2885 return false; 2886 2887 // Always keep a floating-point constant second, since comparisons with 2888 // zero can use LOAD TEST and comparisons with other constants make a 2889 // natural memory operand. 2890 if (isa<ConstantFPSDNode>(C.Op1)) 2891 return false; 2892 2893 // Never swap comparisons with zero since there are many ways to optimize 2894 // those later. 2895 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 2896 if (ConstOp1 && ConstOp1->getZExtValue() == 0) 2897 return false; 2898 2899 // Also keep natural memory operands second if the loaded value is 2900 // only used here. Several comparisons have memory forms. 2901 if (isNaturalMemoryOperand(C.Op1, C.ICmpType) && C.Op1.hasOneUse()) 2902 return false; 2903 2904 // Look for cases where Cmp0 is a single-use load and Cmp1 isn't. 2905 // In that case we generally prefer the memory to be second. 2906 if (isNaturalMemoryOperand(C.Op0, C.ICmpType) && C.Op0.hasOneUse()) { 2907 // The only exceptions are when the second operand is a constant and 2908 // we can use things like CHHSI. 2909 if (!ConstOp1) 2910 return true; 2911 // The unsigned memory-immediate instructions can handle 16-bit 2912 // unsigned integers. 2913 if (C.ICmpType != SystemZICMP::SignedOnly && 2914 isUInt<16>(ConstOp1->getZExtValue())) 2915 return false; 2916 // The signed memory-immediate instructions can handle 16-bit 2917 // signed integers. 2918 if (C.ICmpType != SystemZICMP::UnsignedOnly && 2919 isInt<16>(ConstOp1->getSExtValue())) 2920 return false; 2921 return true; 2922 } 2923 2924 // Try to promote the use of CGFR and CLGFR. 2925 unsigned Opcode0 = C.Op0.getOpcode(); 2926 if (C.ICmpType != SystemZICMP::UnsignedOnly && Opcode0 == ISD::SIGN_EXTEND) 2927 return true; 2928 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::ZERO_EXTEND) 2929 return true; 2930 if (C.ICmpType != SystemZICMP::SignedOnly && Opcode0 == ISD::AND && 2931 C.Op0.getOperand(1).getOpcode() == ISD::Constant && 2932 C.Op0.getConstantOperandVal(1) == 0xffffffff) 2933 return true; 2934 2935 return false; 2936 } 2937 2938 // Check whether C tests for equality between X and Y and whether X - Y 2939 // or Y - X is also computed. In that case it's better to compare the 2940 // result of the subtraction against zero. 2941 static void adjustForSubtraction(SelectionDAG &DAG, const SDLoc &DL, 2942 Comparison &C) { 2943 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 2944 C.CCMask == SystemZ::CCMASK_CMP_NE) { 2945 for (SDNode *N : C.Op0->users()) { 2946 if (N->getOpcode() == ISD::SUB && 2947 ((N->getOperand(0) == C.Op0 && N->getOperand(1) == C.Op1) || 2948 (N->getOperand(0) == C.Op1 && N->getOperand(1) == C.Op0))) { 2949 // Disable the nsw and nuw flags: the backend needs to handle 2950 // overflow as well during comparison elimination. 2951 N->dropFlags(SDNodeFlags::NoWrap); 2952 C.Op0 = SDValue(N, 0); 2953 C.Op1 = DAG.getConstant(0, DL, N->getValueType(0)); 2954 return; 2955 } 2956 } 2957 } 2958 } 2959 2960 // Check whether C compares a floating-point value with zero and if that 2961 // floating-point value is also negated. In this case we can use the 2962 // negation to set CC, so avoiding separate LOAD AND TEST and 2963 // LOAD (NEGATIVE/COMPLEMENT) instructions. 2964 static void adjustForFNeg(Comparison &C) { 2965 // This optimization is invalid for strict comparisons, since FNEG 2966 // does not raise any exceptions. 2967 if (C.Chain) 2968 return; 2969 auto *C1 = dyn_cast<ConstantFPSDNode>(C.Op1); 2970 if (C1 && C1->isZero()) { 2971 for (SDNode *N : C.Op0->users()) { 2972 if (N->getOpcode() == ISD::FNEG) { 2973 C.Op0 = SDValue(N, 0); 2974 C.CCMask = SystemZ::reverseCCMask(C.CCMask); 2975 return; 2976 } 2977 } 2978 } 2979 } 2980 2981 // Check whether C compares (shl X, 32) with 0 and whether X is 2982 // also sign-extended. In that case it is better to test the result 2983 // of the sign extension using LTGFR. 2984 // 2985 // This case is important because InstCombine transforms a comparison 2986 // with (sext (trunc X)) into a comparison with (shl X, 32). 2987 static void adjustForLTGFR(Comparison &C) { 2988 // Check for a comparison between (shl X, 32) and 0. 2989 if (C.Op0.getOpcode() == ISD::SHL && C.Op0.getValueType() == MVT::i64 && 2990 C.Op1.getOpcode() == ISD::Constant && C.Op1->getAsZExtVal() == 0) { 2991 auto *C1 = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 2992 if (C1 && C1->getZExtValue() == 32) { 2993 SDValue ShlOp0 = C.Op0.getOperand(0); 2994 // See whether X has any SIGN_EXTEND_INREG uses. 2995 for (SDNode *N : ShlOp0->users()) { 2996 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG && 2997 cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32) { 2998 C.Op0 = SDValue(N, 0); 2999 return; 3000 } 3001 } 3002 } 3003 } 3004 } 3005 3006 // If C compares the truncation of an extending load, try to compare 3007 // the untruncated value instead. This exposes more opportunities to 3008 // reuse CC. 3009 static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL, 3010 Comparison &C) { 3011 if (C.Op0.getOpcode() == ISD::TRUNCATE && 3012 C.Op0.getOperand(0).getOpcode() == ISD::LOAD && 3013 C.Op1.getOpcode() == ISD::Constant && 3014 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && 3015 C.Op1->getAsZExtVal() == 0) { 3016 auto *L = cast<LoadSDNode>(C.Op0.getOperand(0)); 3017 if (L->getMemoryVT().getStoreSizeInBits().getFixedValue() <= 3018 C.Op0.getValueSizeInBits().getFixedValue()) { 3019 unsigned Type = L->getExtensionType(); 3020 if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) || 3021 (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) { 3022 C.Op0 = C.Op0.getOperand(0); 3023 C.Op1 = DAG.getConstant(0, DL, C.Op0.getValueType()); 3024 } 3025 } 3026 } 3027 } 3028 3029 // Return true if shift operation N has an in-range constant shift value. 3030 // Store it in ShiftVal if so. 3031 static bool isSimpleShift(SDValue N, unsigned &ShiftVal) { 3032 auto *Shift = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3033 if (!Shift) 3034 return false; 3035 3036 uint64_t Amount = Shift->getZExtValue(); 3037 if (Amount >= N.getValueSizeInBits()) 3038 return false; 3039 3040 ShiftVal = Amount; 3041 return true; 3042 } 3043 3044 // Check whether an AND with Mask is suitable for a TEST UNDER MASK 3045 // instruction and whether the CC value is descriptive enough to handle 3046 // a comparison of type Opcode between the AND result and CmpVal. 3047 // CCMask says which comparison result is being tested and BitSize is 3048 // the number of bits in the operands. If TEST UNDER MASK can be used, 3049 // return the corresponding CC mask, otherwise return 0. 3050 static unsigned getTestUnderMaskCond(unsigned BitSize, unsigned CCMask, 3051 uint64_t Mask, uint64_t CmpVal, 3052 unsigned ICmpType) { 3053 assert(Mask != 0 && "ANDs with zero should have been removed by now"); 3054 3055 // Check whether the mask is suitable for TMHH, TMHL, TMLH or TMLL. 3056 if (!SystemZ::isImmLL(Mask) && !SystemZ::isImmLH(Mask) && 3057 !SystemZ::isImmHL(Mask) && !SystemZ::isImmHH(Mask)) 3058 return 0; 3059 3060 // Work out the masks for the lowest and highest bits. 3061 uint64_t High = llvm::bit_floor(Mask); 3062 uint64_t Low = uint64_t(1) << llvm::countr_zero(Mask); 3063 3064 // Signed ordered comparisons are effectively unsigned if the sign 3065 // bit is dropped. 3066 bool EffectivelyUnsigned = (ICmpType != SystemZICMP::SignedOnly); 3067 3068 // Check for equality comparisons with 0, or the equivalent. 3069 if (CmpVal == 0) { 3070 if (CCMask == SystemZ::CCMASK_CMP_EQ) 3071 return SystemZ::CCMASK_TM_ALL_0; 3072 if (CCMask == SystemZ::CCMASK_CMP_NE) 3073 return SystemZ::CCMASK_TM_SOME_1; 3074 } 3075 if (EffectivelyUnsigned && CmpVal > 0 && CmpVal <= Low) { 3076 if (CCMask == SystemZ::CCMASK_CMP_LT) 3077 return SystemZ::CCMASK_TM_ALL_0; 3078 if (CCMask == SystemZ::CCMASK_CMP_GE) 3079 return SystemZ::CCMASK_TM_SOME_1; 3080 } 3081 if (EffectivelyUnsigned && CmpVal < Low) { 3082 if (CCMask == SystemZ::CCMASK_CMP_LE) 3083 return SystemZ::CCMASK_TM_ALL_0; 3084 if (CCMask == SystemZ::CCMASK_CMP_GT) 3085 return SystemZ::CCMASK_TM_SOME_1; 3086 } 3087 3088 // Check for equality comparisons with the mask, or the equivalent. 3089 if (CmpVal == Mask) { 3090 if (CCMask == SystemZ::CCMASK_CMP_EQ) 3091 return SystemZ::CCMASK_TM_ALL_1; 3092 if (CCMask == SystemZ::CCMASK_CMP_NE) 3093 return SystemZ::CCMASK_TM_SOME_0; 3094 } 3095 if (EffectivelyUnsigned && CmpVal >= Mask - Low && CmpVal < Mask) { 3096 if (CCMask == SystemZ::CCMASK_CMP_GT) 3097 return SystemZ::CCMASK_TM_ALL_1; 3098 if (CCMask == SystemZ::CCMASK_CMP_LE) 3099 return SystemZ::CCMASK_TM_SOME_0; 3100 } 3101 if (EffectivelyUnsigned && CmpVal > Mask - Low && CmpVal <= Mask) { 3102 if (CCMask == SystemZ::CCMASK_CMP_GE) 3103 return SystemZ::CCMASK_TM_ALL_1; 3104 if (CCMask == SystemZ::CCMASK_CMP_LT) 3105 return SystemZ::CCMASK_TM_SOME_0; 3106 } 3107 3108 // Check for ordered comparisons with the top bit. 3109 if (EffectivelyUnsigned && CmpVal >= Mask - High && CmpVal < High) { 3110 if (CCMask == SystemZ::CCMASK_CMP_LE) 3111 return SystemZ::CCMASK_TM_MSB_0; 3112 if (CCMask == SystemZ::CCMASK_CMP_GT) 3113 return SystemZ::CCMASK_TM_MSB_1; 3114 } 3115 if (EffectivelyUnsigned && CmpVal > Mask - High && CmpVal <= High) { 3116 if (CCMask == SystemZ::CCMASK_CMP_LT) 3117 return SystemZ::CCMASK_TM_MSB_0; 3118 if (CCMask == SystemZ::CCMASK_CMP_GE) 3119 return SystemZ::CCMASK_TM_MSB_1; 3120 } 3121 3122 // If there are just two bits, we can do equality checks for Low and High 3123 // as well. 3124 if (Mask == Low + High) { 3125 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == Low) 3126 return SystemZ::CCMASK_TM_MIXED_MSB_0; 3127 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == Low) 3128 return SystemZ::CCMASK_TM_MIXED_MSB_0 ^ SystemZ::CCMASK_ANY; 3129 if (CCMask == SystemZ::CCMASK_CMP_EQ && CmpVal == High) 3130 return SystemZ::CCMASK_TM_MIXED_MSB_1; 3131 if (CCMask == SystemZ::CCMASK_CMP_NE && CmpVal == High) 3132 return SystemZ::CCMASK_TM_MIXED_MSB_1 ^ SystemZ::CCMASK_ANY; 3133 } 3134 3135 // Looks like we've exhausted our options. 3136 return 0; 3137 } 3138 3139 // See whether C can be implemented as a TEST UNDER MASK instruction. 3140 // Update the arguments with the TM version if so. 3141 static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL, 3142 Comparison &C) { 3143 // Use VECTOR TEST UNDER MASK for i128 operations. 3144 if (C.Op0.getValueType() == MVT::i128) { 3145 // We can use VTM for EQ/NE comparisons of x & y against 0. 3146 if (C.Op0.getOpcode() == ISD::AND && 3147 (C.CCMask == SystemZ::CCMASK_CMP_EQ || 3148 C.CCMask == SystemZ::CCMASK_CMP_NE)) { 3149 auto *Mask = dyn_cast<ConstantSDNode>(C.Op1); 3150 if (Mask && Mask->getAPIntValue() == 0) { 3151 C.Opcode = SystemZISD::VTM; 3152 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(1)); 3153 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, C.Op0.getOperand(0)); 3154 C.CCValid = SystemZ::CCMASK_VCMP; 3155 if (C.CCMask == SystemZ::CCMASK_CMP_EQ) 3156 C.CCMask = SystemZ::CCMASK_VCMP_ALL; 3157 else 3158 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; 3159 } 3160 } 3161 return; 3162 } 3163 3164 // Check that we have a comparison with a constant. 3165 auto *ConstOp1 = dyn_cast<ConstantSDNode>(C.Op1); 3166 if (!ConstOp1) 3167 return; 3168 uint64_t CmpVal = ConstOp1->getZExtValue(); 3169 3170 // Check whether the nonconstant input is an AND with a constant mask. 3171 Comparison NewC(C); 3172 uint64_t MaskVal; 3173 ConstantSDNode *Mask = nullptr; 3174 if (C.Op0.getOpcode() == ISD::AND) { 3175 NewC.Op0 = C.Op0.getOperand(0); 3176 NewC.Op1 = C.Op0.getOperand(1); 3177 Mask = dyn_cast<ConstantSDNode>(NewC.Op1); 3178 if (!Mask) 3179 return; 3180 MaskVal = Mask->getZExtValue(); 3181 } else { 3182 // There is no instruction to compare with a 64-bit immediate 3183 // so use TMHH instead if possible. We need an unsigned ordered 3184 // comparison with an i64 immediate. 3185 if (NewC.Op0.getValueType() != MVT::i64 || 3186 NewC.CCMask == SystemZ::CCMASK_CMP_EQ || 3187 NewC.CCMask == SystemZ::CCMASK_CMP_NE || 3188 NewC.ICmpType == SystemZICMP::SignedOnly) 3189 return; 3190 // Convert LE and GT comparisons into LT and GE. 3191 if (NewC.CCMask == SystemZ::CCMASK_CMP_LE || 3192 NewC.CCMask == SystemZ::CCMASK_CMP_GT) { 3193 if (CmpVal == uint64_t(-1)) 3194 return; 3195 CmpVal += 1; 3196 NewC.CCMask ^= SystemZ::CCMASK_CMP_EQ; 3197 } 3198 // If the low N bits of Op1 are zero than the low N bits of Op0 can 3199 // be masked off without changing the result. 3200 MaskVal = -(CmpVal & -CmpVal); 3201 NewC.ICmpType = SystemZICMP::UnsignedOnly; 3202 } 3203 if (!MaskVal) 3204 return; 3205 3206 // Check whether the combination of mask, comparison value and comparison 3207 // type are suitable. 3208 unsigned BitSize = NewC.Op0.getValueSizeInBits(); 3209 unsigned NewCCMask, ShiftVal; 3210 if (NewC.ICmpType != SystemZICMP::SignedOnly && 3211 NewC.Op0.getOpcode() == ISD::SHL && 3212 isSimpleShift(NewC.Op0, ShiftVal) && 3213 (MaskVal >> ShiftVal != 0) && 3214 ((CmpVal >> ShiftVal) << ShiftVal) == CmpVal && 3215 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 3216 MaskVal >> ShiftVal, 3217 CmpVal >> ShiftVal, 3218 SystemZICMP::Any))) { 3219 NewC.Op0 = NewC.Op0.getOperand(0); 3220 MaskVal >>= ShiftVal; 3221 } else if (NewC.ICmpType != SystemZICMP::SignedOnly && 3222 NewC.Op0.getOpcode() == ISD::SRL && 3223 isSimpleShift(NewC.Op0, ShiftVal) && 3224 (MaskVal << ShiftVal != 0) && 3225 ((CmpVal << ShiftVal) >> ShiftVal) == CmpVal && 3226 (NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, 3227 MaskVal << ShiftVal, 3228 CmpVal << ShiftVal, 3229 SystemZICMP::UnsignedOnly))) { 3230 NewC.Op0 = NewC.Op0.getOperand(0); 3231 MaskVal <<= ShiftVal; 3232 } else { 3233 NewCCMask = getTestUnderMaskCond(BitSize, NewC.CCMask, MaskVal, CmpVal, 3234 NewC.ICmpType); 3235 if (!NewCCMask) 3236 return; 3237 } 3238 3239 // Go ahead and make the change. 3240 C.Opcode = SystemZISD::TM; 3241 C.Op0 = NewC.Op0; 3242 if (Mask && Mask->getZExtValue() == MaskVal) 3243 C.Op1 = SDValue(Mask, 0); 3244 else 3245 C.Op1 = DAG.getConstant(MaskVal, DL, C.Op0.getValueType()); 3246 C.CCValid = SystemZ::CCMASK_TM; 3247 C.CCMask = NewCCMask; 3248 } 3249 3250 // Implement i128 comparison in vector registers. 3251 static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL, 3252 Comparison &C) { 3253 if (C.Opcode != SystemZISD::ICMP) 3254 return; 3255 if (C.Op0.getValueType() != MVT::i128) 3256 return; 3257 if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3()) 3258 return; 3259 3260 // (In-)Equality comparisons can be implemented via VCEQGS. 3261 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 3262 C.CCMask == SystemZ::CCMASK_CMP_NE) { 3263 C.Opcode = SystemZISD::VICMPES; 3264 C.Op0 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op0); 3265 C.Op1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, C.Op1); 3266 C.CCValid = SystemZ::CCMASK_VCMP; 3267 if (C.CCMask == SystemZ::CCMASK_CMP_EQ) 3268 C.CCMask = SystemZ::CCMASK_VCMP_ALL; 3269 else 3270 C.CCMask = SystemZ::CCMASK_VCMP_ALL ^ C.CCValid; 3271 return; 3272 } 3273 3274 // Normalize other comparisons to GT. 3275 bool Swap = false, Invert = false; 3276 switch (C.CCMask) { 3277 case SystemZ::CCMASK_CMP_GT: break; 3278 case SystemZ::CCMASK_CMP_LT: Swap = true; break; 3279 case SystemZ::CCMASK_CMP_LE: Invert = true; break; 3280 case SystemZ::CCMASK_CMP_GE: Swap = Invert = true; break; 3281 default: llvm_unreachable("Invalid integer condition!"); 3282 } 3283 if (Swap) 3284 std::swap(C.Op0, C.Op1); 3285 3286 if (C.ICmpType == SystemZICMP::UnsignedOnly) 3287 C.Opcode = SystemZISD::UCMP128HI; 3288 else 3289 C.Opcode = SystemZISD::SCMP128HI; 3290 C.CCValid = SystemZ::CCMASK_ANY; 3291 C.CCMask = SystemZ::CCMASK_1; 3292 3293 if (Invert) 3294 C.CCMask ^= C.CCValid; 3295 } 3296 3297 // See whether the comparison argument contains a redundant AND 3298 // and remove it if so. This sometimes happens due to the generic 3299 // BRCOND expansion. 3300 static void adjustForRedundantAnd(SelectionDAG &DAG, const SDLoc &DL, 3301 Comparison &C) { 3302 if (C.Op0.getOpcode() != ISD::AND) 3303 return; 3304 auto *Mask = dyn_cast<ConstantSDNode>(C.Op0.getOperand(1)); 3305 if (!Mask || Mask->getValueSizeInBits(0) > 64) 3306 return; 3307 KnownBits Known = DAG.computeKnownBits(C.Op0.getOperand(0)); 3308 if ((~Known.Zero).getZExtValue() & ~Mask->getZExtValue()) 3309 return; 3310 3311 C.Op0 = C.Op0.getOperand(0); 3312 } 3313 3314 // Return a Comparison that tests the condition-code result of intrinsic 3315 // node Call against constant integer CC using comparison code Cond. 3316 // Opcode is the opcode of the SystemZISD operation for the intrinsic 3317 // and CCValid is the set of possible condition-code results. 3318 static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, 3319 SDValue Call, unsigned CCValid, uint64_t CC, 3320 ISD::CondCode Cond) { 3321 Comparison C(Call, SDValue(), SDValue()); 3322 C.Opcode = Opcode; 3323 C.CCValid = CCValid; 3324 if (Cond == ISD::SETEQ) 3325 // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. 3326 C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; 3327 else if (Cond == ISD::SETNE) 3328 // ...and the inverse of that. 3329 C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; 3330 else if (Cond == ISD::SETLT || Cond == ISD::SETULT) 3331 // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, 3332 // always true for CC>3. 3333 C.CCMask = CC < 4 ? ~0U << (4 - CC) : -1; 3334 else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) 3335 // ...and the inverse of that. 3336 C.CCMask = CC < 4 ? ~(~0U << (4 - CC)) : 0; 3337 else if (Cond == ISD::SETLE || Cond == ISD::SETULE) 3338 // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), 3339 // always true for CC>3. 3340 C.CCMask = CC < 4 ? ~0U << (3 - CC) : -1; 3341 else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) 3342 // ...and the inverse of that. 3343 C.CCMask = CC < 4 ? ~(~0U << (3 - CC)) : 0; 3344 else 3345 llvm_unreachable("Unexpected integer comparison type"); 3346 C.CCMask &= CCValid; 3347 return C; 3348 } 3349 3350 // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. 3351 static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, 3352 ISD::CondCode Cond, const SDLoc &DL, 3353 SDValue Chain = SDValue(), 3354 bool IsSignaling = false) { 3355 if (CmpOp1.getOpcode() == ISD::Constant) { 3356 assert(!Chain); 3357 unsigned Opcode, CCValid; 3358 if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && 3359 CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && 3360 isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) 3361 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, 3362 CmpOp1->getAsZExtVal(), Cond); 3363 if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 3364 CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && 3365 isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) 3366 return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, 3367 CmpOp1->getAsZExtVal(), Cond); 3368 } 3369 Comparison C(CmpOp0, CmpOp1, Chain); 3370 C.CCMask = CCMaskForCondCode(Cond); 3371 if (C.Op0.getValueType().isFloatingPoint()) { 3372 C.CCValid = SystemZ::CCMASK_FCMP; 3373 if (!C.Chain) 3374 C.Opcode = SystemZISD::FCMP; 3375 else if (!IsSignaling) 3376 C.Opcode = SystemZISD::STRICT_FCMP; 3377 else 3378 C.Opcode = SystemZISD::STRICT_FCMPS; 3379 adjustForFNeg(C); 3380 } else { 3381 assert(!C.Chain); 3382 C.CCValid = SystemZ::CCMASK_ICMP; 3383 C.Opcode = SystemZISD::ICMP; 3384 // Choose the type of comparison. Equality and inequality tests can 3385 // use either signed or unsigned comparisons. The choice also doesn't 3386 // matter if both sign bits are known to be clear. In those cases we 3387 // want to give the main isel code the freedom to choose whichever 3388 // form fits best. 3389 if (C.CCMask == SystemZ::CCMASK_CMP_EQ || 3390 C.CCMask == SystemZ::CCMASK_CMP_NE || 3391 (DAG.SignBitIsZero(C.Op0) && DAG.SignBitIsZero(C.Op1))) 3392 C.ICmpType = SystemZICMP::Any; 3393 else if (C.CCMask & SystemZ::CCMASK_CMP_UO) 3394 C.ICmpType = SystemZICMP::UnsignedOnly; 3395 else 3396 C.ICmpType = SystemZICMP::SignedOnly; 3397 C.CCMask &= ~SystemZ::CCMASK_CMP_UO; 3398 adjustForRedundantAnd(DAG, DL, C); 3399 adjustZeroCmp(DAG, DL, C); 3400 adjustSubwordCmp(DAG, DL, C); 3401 adjustForSubtraction(DAG, DL, C); 3402 adjustForLTGFR(C); 3403 adjustICmpTruncate(DAG, DL, C); 3404 } 3405 3406 if (shouldSwapCmpOperands(C)) { 3407 std::swap(C.Op0, C.Op1); 3408 C.CCMask = SystemZ::reverseCCMask(C.CCMask); 3409 } 3410 3411 adjustForTestUnderMask(DAG, DL, C); 3412 adjustICmp128(DAG, DL, C); 3413 return C; 3414 } 3415 3416 // Emit the comparison instruction described by C. 3417 static SDValue emitCmp(SelectionDAG &DAG, const SDLoc &DL, Comparison &C) { 3418 if (!C.Op1.getNode()) { 3419 SDNode *Node; 3420 switch (C.Op0.getOpcode()) { 3421 case ISD::INTRINSIC_W_CHAIN: 3422 Node = emitIntrinsicWithCCAndChain(DAG, C.Op0, C.Opcode); 3423 return SDValue(Node, 0); 3424 case ISD::INTRINSIC_WO_CHAIN: 3425 Node = emitIntrinsicWithCC(DAG, C.Op0, C.Opcode); 3426 return SDValue(Node, Node->getNumValues() - 1); 3427 default: 3428 llvm_unreachable("Invalid comparison operands"); 3429 } 3430 } 3431 if (C.Opcode == SystemZISD::ICMP) 3432 return DAG.getNode(SystemZISD::ICMP, DL, MVT::i32, C.Op0, C.Op1, 3433 DAG.getTargetConstant(C.ICmpType, DL, MVT::i32)); 3434 if (C.Opcode == SystemZISD::TM) { 3435 bool RegisterOnly = (bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_0) != 3436 bool(C.CCMask & SystemZ::CCMASK_TM_MIXED_MSB_1)); 3437 return DAG.getNode(SystemZISD::TM, DL, MVT::i32, C.Op0, C.Op1, 3438 DAG.getTargetConstant(RegisterOnly, DL, MVT::i32)); 3439 } 3440 if (C.Opcode == SystemZISD::VICMPES) { 3441 SDVTList VTs = DAG.getVTList(C.Op0.getValueType(), MVT::i32); 3442 SDValue Val = DAG.getNode(C.Opcode, DL, VTs, C.Op0, C.Op1); 3443 return SDValue(Val.getNode(), 1); 3444 } 3445 if (C.Chain) { 3446 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); 3447 return DAG.getNode(C.Opcode, DL, VTs, C.Chain, C.Op0, C.Op1); 3448 } 3449 return DAG.getNode(C.Opcode, DL, MVT::i32, C.Op0, C.Op1); 3450 } 3451 3452 // Implement a 32-bit *MUL_LOHI operation by extending both operands to 3453 // 64 bits. Extend is the extension type to use. Store the high part 3454 // in Hi and the low part in Lo. 3455 static void lowerMUL_LOHI32(SelectionDAG &DAG, const SDLoc &DL, unsigned Extend, 3456 SDValue Op0, SDValue Op1, SDValue &Hi, 3457 SDValue &Lo) { 3458 Op0 = DAG.getNode(Extend, DL, MVT::i64, Op0); 3459 Op1 = DAG.getNode(Extend, DL, MVT::i64, Op1); 3460 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, Op0, Op1); 3461 Hi = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul, 3462 DAG.getConstant(32, DL, MVT::i64)); 3463 Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Hi); 3464 Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Mul); 3465 } 3466 3467 // Lower a binary operation that produces two VT results, one in each 3468 // half of a GR128 pair. Op0 and Op1 are the VT operands to the operation, 3469 // and Opcode performs the GR128 operation. Store the even register result 3470 // in Even and the odd register result in Odd. 3471 static void lowerGR128Binary(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 3472 unsigned Opcode, SDValue Op0, SDValue Op1, 3473 SDValue &Even, SDValue &Odd) { 3474 SDValue Result = DAG.getNode(Opcode, DL, MVT::Untyped, Op0, Op1); 3475 bool Is32Bit = is32Bit(VT); 3476 Even = DAG.getTargetExtractSubreg(SystemZ::even128(Is32Bit), DL, VT, Result); 3477 Odd = DAG.getTargetExtractSubreg(SystemZ::odd128(Is32Bit), DL, VT, Result); 3478 } 3479 3480 // Return an i32 value that is 1 if the CC value produced by CCReg is 3481 // in the mask CCMask and 0 otherwise. CC is known to have a value 3482 // in CCValid, so other values can be ignored. 3483 static SDValue emitSETCC(SelectionDAG &DAG, const SDLoc &DL, SDValue CCReg, 3484 unsigned CCValid, unsigned CCMask) { 3485 SDValue Ops[] = {DAG.getConstant(1, DL, MVT::i32), 3486 DAG.getConstant(0, DL, MVT::i32), 3487 DAG.getTargetConstant(CCValid, DL, MVT::i32), 3488 DAG.getTargetConstant(CCMask, DL, MVT::i32), CCReg}; 3489 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, MVT::i32, Ops); 3490 } 3491 3492 // Return the SystemISD vector comparison operation for CC, or 0 if it cannot 3493 // be done directly. Mode is CmpMode::Int for integer comparisons, CmpMode::FP 3494 // for regular floating-point comparisons, CmpMode::StrictFP for strict (quiet) 3495 // floating-point comparisons, and CmpMode::SignalingFP for strict signaling 3496 // floating-point comparisons. 3497 enum class CmpMode { Int, FP, StrictFP, SignalingFP }; 3498 static unsigned getVectorComparison(ISD::CondCode CC, CmpMode Mode) { 3499 switch (CC) { 3500 case ISD::SETOEQ: 3501 case ISD::SETEQ: 3502 switch (Mode) { 3503 case CmpMode::Int: return SystemZISD::VICMPE; 3504 case CmpMode::FP: return SystemZISD::VFCMPE; 3505 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPE; 3506 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPES; 3507 } 3508 llvm_unreachable("Bad mode"); 3509 3510 case ISD::SETOGE: 3511 case ISD::SETGE: 3512 switch (Mode) { 3513 case CmpMode::Int: return 0; 3514 case CmpMode::FP: return SystemZISD::VFCMPHE; 3515 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPHE; 3516 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHES; 3517 } 3518 llvm_unreachable("Bad mode"); 3519 3520 case ISD::SETOGT: 3521 case ISD::SETGT: 3522 switch (Mode) { 3523 case CmpMode::Int: return SystemZISD::VICMPH; 3524 case CmpMode::FP: return SystemZISD::VFCMPH; 3525 case CmpMode::StrictFP: return SystemZISD::STRICT_VFCMPH; 3526 case CmpMode::SignalingFP: return SystemZISD::STRICT_VFCMPHS; 3527 } 3528 llvm_unreachable("Bad mode"); 3529 3530 case ISD::SETUGT: 3531 switch (Mode) { 3532 case CmpMode::Int: return SystemZISD::VICMPHL; 3533 case CmpMode::FP: return 0; 3534 case CmpMode::StrictFP: return 0; 3535 case CmpMode::SignalingFP: return 0; 3536 } 3537 llvm_unreachable("Bad mode"); 3538 3539 default: 3540 return 0; 3541 } 3542 } 3543 3544 // Return the SystemZISD vector comparison operation for CC or its inverse, 3545 // or 0 if neither can be done directly. Indicate in Invert whether the 3546 // result is for the inverse of CC. Mode is as above. 3547 static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, CmpMode Mode, 3548 bool &Invert) { 3549 if (unsigned Opcode = getVectorComparison(CC, Mode)) { 3550 Invert = false; 3551 return Opcode; 3552 } 3553 3554 CC = ISD::getSetCCInverse(CC, Mode == CmpMode::Int ? MVT::i32 : MVT::f32); 3555 if (unsigned Opcode = getVectorComparison(CC, Mode)) { 3556 Invert = true; 3557 return Opcode; 3558 } 3559 3560 return 0; 3561 } 3562 3563 // Return a v2f64 that contains the extended form of elements Start and Start+1 3564 // of v4f32 value Op. If Chain is nonnull, return the strict form. 3565 static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, const SDLoc &DL, 3566 SDValue Op, SDValue Chain) { 3567 int Mask[] = { Start, -1, Start + 1, -1 }; 3568 Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); 3569 if (Chain) { 3570 SDVTList VTs = DAG.getVTList(MVT::v2f64, MVT::Other); 3571 return DAG.getNode(SystemZISD::STRICT_VEXTEND, DL, VTs, Chain, Op); 3572 } 3573 return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); 3574 } 3575 3576 // Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, 3577 // producing a result of type VT. If Chain is nonnull, return the strict form. 3578 SDValue SystemZTargetLowering::getVectorCmp(SelectionDAG &DAG, unsigned Opcode, 3579 const SDLoc &DL, EVT VT, 3580 SDValue CmpOp0, 3581 SDValue CmpOp1, 3582 SDValue Chain) const { 3583 // There is no hardware support for v4f32 (unless we have the vector 3584 // enhancements facility 1), so extend the vector into two v2f64s 3585 // and compare those. 3586 if (CmpOp0.getValueType() == MVT::v4f32 && 3587 !Subtarget.hasVectorEnhancements1()) { 3588 SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0, Chain); 3589 SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0, Chain); 3590 SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1, Chain); 3591 SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1, Chain); 3592 if (Chain) { 3593 SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::Other); 3594 SDValue HRes = DAG.getNode(Opcode, DL, VTs, Chain, H0, H1); 3595 SDValue LRes = DAG.getNode(Opcode, DL, VTs, Chain, L0, L1); 3596 SDValue Res = DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); 3597 SDValue Chains[6] = { H0.getValue(1), L0.getValue(1), 3598 H1.getValue(1), L1.getValue(1), 3599 HRes.getValue(1), LRes.getValue(1) }; 3600 SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); 3601 SDValue Ops[2] = { Res, NewChain }; 3602 return DAG.getMergeValues(Ops, DL); 3603 } 3604 SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); 3605 SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); 3606 return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); 3607 } 3608 if (Chain) { 3609 SDVTList VTs = DAG.getVTList(VT, MVT::Other); 3610 return DAG.getNode(Opcode, DL, VTs, Chain, CmpOp0, CmpOp1); 3611 } 3612 return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); 3613 } 3614 3615 // Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing 3616 // an integer mask of type VT. If Chain is nonnull, we have a strict 3617 // floating-point comparison. If in addition IsSignaling is true, we have 3618 // a strict signaling floating-point comparison. 3619 SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG, 3620 const SDLoc &DL, EVT VT, 3621 ISD::CondCode CC, 3622 SDValue CmpOp0, 3623 SDValue CmpOp1, 3624 SDValue Chain, 3625 bool IsSignaling) const { 3626 bool IsFP = CmpOp0.getValueType().isFloatingPoint(); 3627 assert (!Chain || IsFP); 3628 assert (!IsSignaling || Chain); 3629 CmpMode Mode = IsSignaling ? CmpMode::SignalingFP : 3630 Chain ? CmpMode::StrictFP : IsFP ? CmpMode::FP : CmpMode::Int; 3631 bool Invert = false; 3632 SDValue Cmp; 3633 switch (CC) { 3634 // Handle tests for order using (or (ogt y x) (oge x y)). 3635 case ISD::SETUO: 3636 Invert = true; 3637 [[fallthrough]]; 3638 case ISD::SETO: { 3639 assert(IsFP && "Unexpected integer comparison"); 3640 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3641 DL, VT, CmpOp1, CmpOp0, Chain); 3642 SDValue GE = getVectorCmp(DAG, getVectorComparison(ISD::SETOGE, Mode), 3643 DL, VT, CmpOp0, CmpOp1, Chain); 3644 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); 3645 if (Chain) 3646 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 3647 LT.getValue(1), GE.getValue(1)); 3648 break; 3649 } 3650 3651 // Handle <> tests using (or (ogt y x) (ogt x y)). 3652 case ISD::SETUEQ: 3653 Invert = true; 3654 [[fallthrough]]; 3655 case ISD::SETONE: { 3656 assert(IsFP && "Unexpected integer comparison"); 3657 SDValue LT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3658 DL, VT, CmpOp1, CmpOp0, Chain); 3659 SDValue GT = getVectorCmp(DAG, getVectorComparison(ISD::SETOGT, Mode), 3660 DL, VT, CmpOp0, CmpOp1, Chain); 3661 Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); 3662 if (Chain) 3663 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 3664 LT.getValue(1), GT.getValue(1)); 3665 break; 3666 } 3667 3668 // Otherwise a single comparison is enough. It doesn't really 3669 // matter whether we try the inversion or the swap first, since 3670 // there are no cases where both work. 3671 default: 3672 // Optimize sign-bit comparisons to signed compares. 3673 if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) && 3674 ISD::isConstantSplatVectorAllZeros(CmpOp1.getNode())) { 3675 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3676 APInt Mask; 3677 if (CmpOp0.getOpcode() == ISD::AND 3678 && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask) 3679 && Mask == APInt::getSignMask(EltSize)) { 3680 CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT; 3681 CmpOp0 = CmpOp0.getOperand(0); 3682 } 3683 } 3684 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) 3685 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain); 3686 else { 3687 CC = ISD::getSetCCSwappedOperands(CC); 3688 if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert)) 3689 Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0, Chain); 3690 else 3691 llvm_unreachable("Unhandled comparison"); 3692 } 3693 if (Chain) 3694 Chain = Cmp.getValue(1); 3695 break; 3696 } 3697 if (Invert) { 3698 SDValue Mask = 3699 DAG.getSplatBuildVector(VT, DL, DAG.getAllOnesConstant(DL, MVT::i64)); 3700 Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); 3701 } 3702 if (Chain && Chain.getNode() != Cmp.getNode()) { 3703 SDValue Ops[2] = { Cmp, Chain }; 3704 Cmp = DAG.getMergeValues(Ops, DL); 3705 } 3706 return Cmp; 3707 } 3708 3709 SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, 3710 SelectionDAG &DAG) const { 3711 SDValue CmpOp0 = Op.getOperand(0); 3712 SDValue CmpOp1 = Op.getOperand(1); 3713 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 3714 SDLoc DL(Op); 3715 EVT VT = Op.getValueType(); 3716 if (VT.isVector()) 3717 return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); 3718 3719 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3720 SDValue CCReg = emitCmp(DAG, DL, C); 3721 return emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); 3722 } 3723 3724 SDValue SystemZTargetLowering::lowerSTRICT_FSETCC(SDValue Op, 3725 SelectionDAG &DAG, 3726 bool IsSignaling) const { 3727 SDValue Chain = Op.getOperand(0); 3728 SDValue CmpOp0 = Op.getOperand(1); 3729 SDValue CmpOp1 = Op.getOperand(2); 3730 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(3))->get(); 3731 SDLoc DL(Op); 3732 EVT VT = Op.getNode()->getValueType(0); 3733 if (VT.isVector()) { 3734 SDValue Res = lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1, 3735 Chain, IsSignaling); 3736 return Res.getValue(Op.getResNo()); 3737 } 3738 3739 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL, Chain, IsSignaling)); 3740 SDValue CCReg = emitCmp(DAG, DL, C); 3741 CCReg->setFlags(Op->getFlags()); 3742 SDValue Result = emitSETCC(DAG, DL, CCReg, C.CCValid, C.CCMask); 3743 SDValue Ops[2] = { Result, CCReg.getValue(1) }; 3744 return DAG.getMergeValues(Ops, DL); 3745 } 3746 3747 SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3748 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3749 SDValue CmpOp0 = Op.getOperand(2); 3750 SDValue CmpOp1 = Op.getOperand(3); 3751 SDValue Dest = Op.getOperand(4); 3752 SDLoc DL(Op); 3753 3754 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3755 SDValue CCReg = emitCmp(DAG, DL, C); 3756 return DAG.getNode( 3757 SystemZISD::BR_CCMASK, DL, Op.getValueType(), Op.getOperand(0), 3758 DAG.getTargetConstant(C.CCValid, DL, MVT::i32), 3759 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), Dest, CCReg); 3760 } 3761 3762 // Return true if Pos is CmpOp and Neg is the negative of CmpOp, 3763 // allowing Pos and Neg to be wider than CmpOp. 3764 static bool isAbsolute(SDValue CmpOp, SDValue Pos, SDValue Neg) { 3765 return (Neg.getOpcode() == ISD::SUB && 3766 Neg.getOperand(0).getOpcode() == ISD::Constant && 3767 Neg.getConstantOperandVal(0) == 0 && Neg.getOperand(1) == Pos && 3768 (Pos == CmpOp || (Pos.getOpcode() == ISD::SIGN_EXTEND && 3769 Pos.getOperand(0) == CmpOp))); 3770 } 3771 3772 // Return the absolute or negative absolute of Op; IsNegative decides which. 3773 static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op, 3774 bool IsNegative) { 3775 Op = DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op); 3776 if (IsNegative) 3777 Op = DAG.getNode(ISD::SUB, DL, Op.getValueType(), 3778 DAG.getConstant(0, DL, Op.getValueType()), Op); 3779 return Op; 3780 } 3781 3782 static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL, 3783 Comparison C, SDValue TrueOp, SDValue FalseOp) { 3784 EVT VT = MVT::i128; 3785 unsigned Op; 3786 3787 if (C.CCMask == SystemZ::CCMASK_CMP_NE || 3788 C.CCMask == SystemZ::CCMASK_CMP_GE || 3789 C.CCMask == SystemZ::CCMASK_CMP_LE) { 3790 std::swap(TrueOp, FalseOp); 3791 C.CCMask ^= C.CCValid; 3792 } 3793 if (C.CCMask == SystemZ::CCMASK_CMP_LT) { 3794 std::swap(C.Op0, C.Op1); 3795 C.CCMask = SystemZ::CCMASK_CMP_GT; 3796 } 3797 switch (C.CCMask) { 3798 case SystemZ::CCMASK_CMP_EQ: 3799 Op = SystemZISD::VICMPE; 3800 break; 3801 case SystemZ::CCMASK_CMP_GT: 3802 if (C.ICmpType == SystemZICMP::UnsignedOnly) 3803 Op = SystemZISD::VICMPHL; 3804 else 3805 Op = SystemZISD::VICMPH; 3806 break; 3807 default: 3808 llvm_unreachable("Unhandled comparison"); 3809 break; 3810 } 3811 3812 SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1); 3813 TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask); 3814 FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT)); 3815 return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp); 3816 } 3817 3818 SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op, 3819 SelectionDAG &DAG) const { 3820 SDValue CmpOp0 = Op.getOperand(0); 3821 SDValue CmpOp1 = Op.getOperand(1); 3822 SDValue TrueOp = Op.getOperand(2); 3823 SDValue FalseOp = Op.getOperand(3); 3824 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3825 SDLoc DL(Op); 3826 3827 Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC, DL)); 3828 3829 // Check for absolute and negative-absolute selections, including those 3830 // where the comparison value is sign-extended (for LPGFR and LNGFR). 3831 // This check supplements the one in DAGCombiner. 3832 if (C.Opcode == SystemZISD::ICMP && C.CCMask != SystemZ::CCMASK_CMP_EQ && 3833 C.CCMask != SystemZ::CCMASK_CMP_NE && 3834 C.Op1.getOpcode() == ISD::Constant && 3835 cast<ConstantSDNode>(C.Op1)->getValueSizeInBits(0) <= 64 && 3836 C.Op1->getAsZExtVal() == 0) { 3837 if (isAbsolute(C.Op0, TrueOp, FalseOp)) 3838 return getAbsolute(DAG, DL, TrueOp, C.CCMask & SystemZ::CCMASK_CMP_LT); 3839 if (isAbsolute(C.Op0, FalseOp, TrueOp)) 3840 return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT); 3841 } 3842 3843 if (Subtarget.hasVectorEnhancements3() && 3844 C.Opcode == SystemZISD::ICMP && 3845 C.Op0.getValueType() == MVT::i128 && 3846 TrueOp.getValueType() == MVT::i128) { 3847 return getI128Select(DAG, DL, C, TrueOp, FalseOp); 3848 } 3849 3850 SDValue CCReg = emitCmp(DAG, DL, C); 3851 SDValue Ops[] = {TrueOp, FalseOp, 3852 DAG.getTargetConstant(C.CCValid, DL, MVT::i32), 3853 DAG.getTargetConstant(C.CCMask, DL, MVT::i32), CCReg}; 3854 3855 return DAG.getNode(SystemZISD::SELECT_CCMASK, DL, Op.getValueType(), Ops); 3856 } 3857 3858 SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, 3859 SelectionDAG &DAG) const { 3860 SDLoc DL(Node); 3861 const GlobalValue *GV = Node->getGlobal(); 3862 int64_t Offset = Node->getOffset(); 3863 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3864 CodeModel::Model CM = DAG.getTarget().getCodeModel(); 3865 3866 SDValue Result; 3867 if (Subtarget.isPC32DBLSymbol(GV, CM)) { 3868 if (isInt<32>(Offset)) { 3869 // Assign anchors at 1<<12 byte boundaries. 3870 uint64_t Anchor = Offset & ~uint64_t(0xfff); 3871 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor); 3872 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3873 3874 // The offset can be folded into the address if it is aligned to a 3875 // halfword. 3876 Offset -= Anchor; 3877 if (Offset != 0 && (Offset & 1) == 0) { 3878 SDValue Full = 3879 DAG.getTargetGlobalAddress(GV, DL, PtrVT, Anchor + Offset); 3880 Result = DAG.getNode(SystemZISD::PCREL_OFFSET, DL, PtrVT, Full, Result); 3881 Offset = 0; 3882 } 3883 } else { 3884 // Conservatively load a constant offset greater than 32 bits into a 3885 // register below. 3886 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT); 3887 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3888 } 3889 } else if (Subtarget.isTargetELF()) { 3890 Result = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, SystemZII::MO_GOT); 3891 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 3892 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, 3893 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 3894 } else if (Subtarget.isTargetzOS()) { 3895 Result = getADAEntry(DAG, GV, DL, PtrVT); 3896 } else 3897 llvm_unreachable("Unexpected Subtarget"); 3898 3899 // If there was a non-zero offset that we didn't fold, create an explicit 3900 // addition for it. 3901 if (Offset != 0) 3902 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 3903 DAG.getSignedConstant(Offset, DL, PtrVT)); 3904 3905 return Result; 3906 } 3907 3908 SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, 3909 SelectionDAG &DAG, 3910 unsigned Opcode, 3911 SDValue GOTOffset) const { 3912 SDLoc DL(Node); 3913 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3914 SDValue Chain = DAG.getEntryNode(); 3915 SDValue Glue; 3916 3917 if (DAG.getMachineFunction().getFunction().getCallingConv() == 3918 CallingConv::GHC) 3919 report_fatal_error("In GHC calling convention TLS is not supported"); 3920 3921 // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. 3922 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 3923 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); 3924 Glue = Chain.getValue(1); 3925 Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); 3926 Glue = Chain.getValue(1); 3927 3928 // The first call operand is the chain and the second is the TLS symbol. 3929 SmallVector<SDValue, 8> Ops; 3930 Ops.push_back(Chain); 3931 Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, 3932 Node->getValueType(0), 3933 0, 0)); 3934 3935 // Add argument registers to the end of the list so that they are 3936 // known live into the call. 3937 Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); 3938 Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); 3939 3940 // Add a register mask operand representing the call-preserved registers. 3941 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 3942 const uint32_t *Mask = 3943 TRI->getCallPreservedMask(DAG.getMachineFunction(), CallingConv::C); 3944 assert(Mask && "Missing call preserved mask for calling convention"); 3945 Ops.push_back(DAG.getRegisterMask(Mask)); 3946 3947 // Glue the call to the argument copies. 3948 Ops.push_back(Glue); 3949 3950 // Emit the call. 3951 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 3952 Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); 3953 Glue = Chain.getValue(1); 3954 3955 // Copy the return value from %r2. 3956 return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); 3957 } 3958 3959 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL, 3960 SelectionDAG &DAG) const { 3961 SDValue Chain = DAG.getEntryNode(); 3962 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3963 3964 // The high part of the thread pointer is in access register 0. 3965 SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32); 3966 TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi); 3967 3968 // The low part of the thread pointer is in access register 1. 3969 SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32); 3970 TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo); 3971 3972 // Merge them into a single 64-bit address. 3973 SDValue TPHiShifted = DAG.getNode(ISD::SHL, DL, PtrVT, TPHi, 3974 DAG.getConstant(32, DL, PtrVT)); 3975 return DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); 3976 } 3977 3978 SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, 3979 SelectionDAG &DAG) const { 3980 if (DAG.getTarget().useEmulatedTLS()) 3981 return LowerToTLSEmulatedModel(Node, DAG); 3982 SDLoc DL(Node); 3983 const GlobalValue *GV = Node->getGlobal(); 3984 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 3985 TLSModel::Model model = DAG.getTarget().getTLSModel(GV); 3986 3987 if (DAG.getMachineFunction().getFunction().getCallingConv() == 3988 CallingConv::GHC) 3989 report_fatal_error("In GHC calling convention TLS is not supported"); 3990 3991 SDValue TP = lowerThreadPointer(DL, DAG); 3992 3993 // Get the offset of GA from the thread pointer, based on the TLS model. 3994 SDValue Offset; 3995 switch (model) { 3996 case TLSModel::GeneralDynamic: { 3997 // Load the GOT offset of the tls_index (module ID / per-symbol offset). 3998 SystemZConstantPoolValue *CPV = 3999 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); 4000 4001 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 4002 Offset = DAG.getLoad( 4003 PtrVT, DL, DAG.getEntryNode(), Offset, 4004 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 4005 4006 // Call __tls_get_offset to retrieve the offset. 4007 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); 4008 break; 4009 } 4010 4011 case TLSModel::LocalDynamic: { 4012 // Load the GOT offset of the module ID. 4013 SystemZConstantPoolValue *CPV = 4014 SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); 4015 4016 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 4017 Offset = DAG.getLoad( 4018 PtrVT, DL, DAG.getEntryNode(), Offset, 4019 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 4020 4021 // Call __tls_get_offset to retrieve the module base offset. 4022 Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); 4023 4024 // Note: The SystemZLDCleanupPass will remove redundant computations 4025 // of the module base offset. Count total number of local-dynamic 4026 // accesses to trigger execution of that pass. 4027 SystemZMachineFunctionInfo* MFI = 4028 DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>(); 4029 MFI->incNumLocalDynamicTLSAccesses(); 4030 4031 // Add the per-symbol offset. 4032 CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); 4033 4034 SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 4035 DTPOffset = DAG.getLoad( 4036 PtrVT, DL, DAG.getEntryNode(), DTPOffset, 4037 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 4038 4039 Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); 4040 break; 4041 } 4042 4043 case TLSModel::InitialExec: { 4044 // Load the offset from the GOT. 4045 Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 4046 SystemZII::MO_INDNTPOFF); 4047 Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); 4048 Offset = 4049 DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Offset, 4050 MachinePointerInfo::getGOT(DAG.getMachineFunction())); 4051 break; 4052 } 4053 4054 case TLSModel::LocalExec: { 4055 // Force the offset into the constant pool and load it from there. 4056 SystemZConstantPoolValue *CPV = 4057 SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); 4058 4059 Offset = DAG.getConstantPool(CPV, PtrVT, Align(8)); 4060 Offset = DAG.getLoad( 4061 PtrVT, DL, DAG.getEntryNode(), Offset, 4062 MachinePointerInfo::getConstantPool(DAG.getMachineFunction())); 4063 break; 4064 } 4065 } 4066 4067 // Add the base and offset together. 4068 return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); 4069 } 4070 4071 SDValue SystemZTargetLowering::lowerBlockAddress(BlockAddressSDNode *Node, 4072 SelectionDAG &DAG) const { 4073 SDLoc DL(Node); 4074 const BlockAddress *BA = Node->getBlockAddress(); 4075 int64_t Offset = Node->getOffset(); 4076 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4077 4078 SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset); 4079 Result = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 4080 return Result; 4081 } 4082 4083 SDValue SystemZTargetLowering::lowerJumpTable(JumpTableSDNode *JT, 4084 SelectionDAG &DAG) const { 4085 SDLoc DL(JT); 4086 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4087 SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 4088 4089 // Use LARL to load the address of the table. 4090 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 4091 } 4092 4093 SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP, 4094 SelectionDAG &DAG) const { 4095 SDLoc DL(CP); 4096 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4097 4098 SDValue Result; 4099 if (CP->isMachineConstantPoolEntry()) 4100 Result = 4101 DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, CP->getAlign()); 4102 else 4103 Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, CP->getAlign(), 4104 CP->getOffset()); 4105 4106 // Use LARL to load the address of the constant pool entry. 4107 return DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Result); 4108 } 4109 4110 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op, 4111 SelectionDAG &DAG) const { 4112 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 4113 MachineFunction &MF = DAG.getMachineFunction(); 4114 MachineFrameInfo &MFI = MF.getFrameInfo(); 4115 MFI.setFrameAddressIsTaken(true); 4116 4117 SDLoc DL(Op); 4118 unsigned Depth = Op.getConstantOperandVal(0); 4119 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4120 4121 // By definition, the frame address is the address of the back chain. (In 4122 // the case of packed stack without backchain, return the address where the 4123 // backchain would have been stored. This will either be an unused space or 4124 // contain a saved register). 4125 int BackChainIdx = TFL->getOrCreateFramePointerSaveIndex(MF); 4126 SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT); 4127 4128 if (Depth > 0) { 4129 // FIXME The frontend should detect this case. 4130 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) 4131 report_fatal_error("Unsupported stack frame traversal count"); 4132 4133 SDValue Offset = DAG.getConstant(TFL->getBackchainOffset(MF), DL, PtrVT); 4134 while (Depth--) { 4135 BackChain = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), BackChain, 4136 MachinePointerInfo()); 4137 BackChain = DAG.getNode(ISD::ADD, DL, PtrVT, BackChain, Offset); 4138 } 4139 } 4140 4141 return BackChain; 4142 } 4143 4144 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op, 4145 SelectionDAG &DAG) const { 4146 MachineFunction &MF = DAG.getMachineFunction(); 4147 MachineFrameInfo &MFI = MF.getFrameInfo(); 4148 MFI.setReturnAddressIsTaken(true); 4149 4150 if (verifyReturnAddressArgumentIsConstant(Op, DAG)) 4151 return SDValue(); 4152 4153 SDLoc DL(Op); 4154 unsigned Depth = Op.getConstantOperandVal(0); 4155 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4156 4157 if (Depth > 0) { 4158 // FIXME The frontend should detect this case. 4159 if (!MF.getSubtarget<SystemZSubtarget>().hasBackChain()) 4160 report_fatal_error("Unsupported stack frame traversal count"); 4161 4162 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG); 4163 const auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 4164 int Offset = TFL->getReturnAddressOffset(MF); 4165 SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, FrameAddr, 4166 DAG.getSignedConstant(Offset, DL, PtrVT)); 4167 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, 4168 MachinePointerInfo()); 4169 } 4170 4171 // Return R14D (Elf) / R7D (XPLINK), which has the return address. Mark it an 4172 // implicit live-in. 4173 SystemZCallingConventionRegisters *CCR = Subtarget.getSpecialRegisters(); 4174 Register LinkReg = MF.addLiveIn(CCR->getReturnFunctionAddressRegister(), 4175 &SystemZ::GR64BitRegClass); 4176 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, LinkReg, PtrVT); 4177 } 4178 4179 SDValue SystemZTargetLowering::lowerBITCAST(SDValue Op, 4180 SelectionDAG &DAG) const { 4181 SDLoc DL(Op); 4182 SDValue In = Op.getOperand(0); 4183 EVT InVT = In.getValueType(); 4184 EVT ResVT = Op.getValueType(); 4185 4186 // Convert loads directly. This is normally done by DAGCombiner, 4187 // but we need this case for bitcasts that are created during lowering 4188 // and which are then lowered themselves. 4189 if (auto *LoadN = dyn_cast<LoadSDNode>(In)) 4190 if (ISD::isNormalLoad(LoadN)) { 4191 SDValue NewLoad = DAG.getLoad(ResVT, DL, LoadN->getChain(), 4192 LoadN->getBasePtr(), LoadN->getMemOperand()); 4193 // Update the chain uses. 4194 DAG.ReplaceAllUsesOfValueWith(SDValue(LoadN, 1), NewLoad.getValue(1)); 4195 return NewLoad; 4196 } 4197 4198 if (InVT == MVT::i32 && ResVT == MVT::f32) { 4199 SDValue In64; 4200 if (Subtarget.hasHighWord()) { 4201 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, 4202 MVT::i64); 4203 In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 4204 MVT::i64, SDValue(U64, 0), In); 4205 } else { 4206 In64 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, In); 4207 In64 = DAG.getNode(ISD::SHL, DL, MVT::i64, In64, 4208 DAG.getConstant(32, DL, MVT::i64)); 4209 } 4210 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); 4211 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, 4212 DL, MVT::f32, Out64); 4213 } 4214 if (InVT == MVT::f32 && ResVT == MVT::i32) { 4215 SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); 4216 SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, 4217 MVT::f64, SDValue(U64, 0), In); 4218 SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); 4219 if (Subtarget.hasHighWord()) 4220 return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, DL, 4221 MVT::i32, Out64); 4222 SDValue Shift = DAG.getNode(ISD::SRL, DL, MVT::i64, Out64, 4223 DAG.getConstant(32, DL, MVT::i64)); 4224 return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Shift); 4225 } 4226 llvm_unreachable("Unexpected bitcast combination"); 4227 } 4228 4229 SDValue SystemZTargetLowering::lowerVASTART(SDValue Op, 4230 SelectionDAG &DAG) const { 4231 4232 if (Subtarget.isTargetXPLINK64()) 4233 return lowerVASTART_XPLINK(Op, DAG); 4234 else 4235 return lowerVASTART_ELF(Op, DAG); 4236 } 4237 4238 SDValue SystemZTargetLowering::lowerVASTART_XPLINK(SDValue Op, 4239 SelectionDAG &DAG) const { 4240 MachineFunction &MF = DAG.getMachineFunction(); 4241 SystemZMachineFunctionInfo *FuncInfo = 4242 MF.getInfo<SystemZMachineFunctionInfo>(); 4243 4244 SDLoc DL(Op); 4245 4246 // vastart just stores the address of the VarArgsFrameIndex slot into the 4247 // memory location argument. 4248 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4249 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 4250 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 4251 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), 4252 MachinePointerInfo(SV)); 4253 } 4254 4255 SDValue SystemZTargetLowering::lowerVASTART_ELF(SDValue Op, 4256 SelectionDAG &DAG) const { 4257 MachineFunction &MF = DAG.getMachineFunction(); 4258 SystemZMachineFunctionInfo *FuncInfo = 4259 MF.getInfo<SystemZMachineFunctionInfo>(); 4260 EVT PtrVT = getPointerTy(DAG.getDataLayout()); 4261 4262 SDValue Chain = Op.getOperand(0); 4263 SDValue Addr = Op.getOperand(1); 4264 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 4265 SDLoc DL(Op); 4266 4267 // The initial values of each field. 4268 const unsigned NumFields = 4; 4269 SDValue Fields[NumFields] = { 4270 DAG.getConstant(FuncInfo->getVarArgsFirstGPR(), DL, PtrVT), 4271 DAG.getConstant(FuncInfo->getVarArgsFirstFPR(), DL, PtrVT), 4272 DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT), 4273 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT) 4274 }; 4275 4276 // Store each field into its respective slot. 4277 SDValue MemOps[NumFields]; 4278 unsigned Offset = 0; 4279 for (unsigned I = 0; I < NumFields; ++I) { 4280 SDValue FieldAddr = Addr; 4281 if (Offset != 0) 4282 FieldAddr = DAG.getNode(ISD::ADD, DL, PtrVT, FieldAddr, 4283 DAG.getIntPtrConstant(Offset, DL)); 4284 MemOps[I] = DAG.getStore(Chain, DL, Fields[I], FieldAddr, 4285 MachinePointerInfo(SV, Offset)); 4286 Offset += 8; 4287 } 4288 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); 4289 } 4290 4291 SDValue SystemZTargetLowering::lowerVACOPY(SDValue Op, 4292 SelectionDAG &DAG) const { 4293 SDValue Chain = Op.getOperand(0); 4294 SDValue DstPtr = Op.getOperand(1); 4295 SDValue SrcPtr = Op.getOperand(2); 4296 const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); 4297 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); 4298 SDLoc DL(Op); 4299 4300 uint32_t Sz = 4301 Subtarget.isTargetXPLINK64() ? getTargetMachine().getPointerSize(0) : 32; 4302 return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(Sz, DL), 4303 Align(8), /*isVolatile*/ false, /*AlwaysInline*/ false, 4304 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(DstSV), 4305 MachinePointerInfo(SrcSV)); 4306 } 4307 4308 SDValue 4309 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op, 4310 SelectionDAG &DAG) const { 4311 if (Subtarget.isTargetXPLINK64()) 4312 return lowerDYNAMIC_STACKALLOC_XPLINK(Op, DAG); 4313 else 4314 return lowerDYNAMIC_STACKALLOC_ELF(Op, DAG); 4315 } 4316 4317 SDValue 4318 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, 4319 SelectionDAG &DAG) const { 4320 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 4321 MachineFunction &MF = DAG.getMachineFunction(); 4322 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); 4323 SDValue Chain = Op.getOperand(0); 4324 SDValue Size = Op.getOperand(1); 4325 SDValue Align = Op.getOperand(2); 4326 SDLoc DL(Op); 4327 4328 // If user has set the no alignment function attribute, ignore 4329 // alloca alignments. 4330 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); 4331 4332 uint64_t StackAlign = TFI->getStackAlignment(); 4333 uint64_t RequiredAlign = std::max(AlignVal, StackAlign); 4334 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; 4335 4336 SDValue NeededSpace = Size; 4337 4338 // Add extra space for alignment if needed. 4339 EVT PtrVT = getPointerTy(MF.getDataLayout()); 4340 if (ExtraAlignSpace) 4341 NeededSpace = DAG.getNode(ISD::ADD, DL, PtrVT, NeededSpace, 4342 DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); 4343 4344 bool IsSigned = false; 4345 bool DoesNotReturn = false; 4346 bool IsReturnValueUsed = false; 4347 EVT VT = Op.getValueType(); 4348 SDValue AllocaCall = 4349 makeExternalCall(Chain, DAG, "@@ALCAXP", VT, ArrayRef(NeededSpace), 4350 CallingConv::C, IsSigned, DL, DoesNotReturn, 4351 IsReturnValueUsed) 4352 .first; 4353 4354 // Perform a CopyFromReg from %GPR4 (stack pointer register). Chain and Glue 4355 // to end of call in order to ensure it isn't broken up from the call 4356 // sequence. 4357 auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>(); 4358 Register SPReg = Regs.getStackPointerRegister(); 4359 Chain = AllocaCall.getValue(1); 4360 SDValue Glue = AllocaCall.getValue(2); 4361 SDValue NewSPRegNode = DAG.getCopyFromReg(Chain, DL, SPReg, PtrVT, Glue); 4362 Chain = NewSPRegNode.getValue(1); 4363 4364 MVT PtrMVT = getPointerMemTy(MF.getDataLayout()); 4365 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, PtrMVT); 4366 SDValue Result = DAG.getNode(ISD::ADD, DL, PtrMVT, NewSPRegNode, ArgAdjust); 4367 4368 // Dynamically realign if needed. 4369 if (ExtraAlignSpace) { 4370 Result = DAG.getNode(ISD::ADD, DL, PtrVT, Result, 4371 DAG.getConstant(ExtraAlignSpace, DL, PtrVT)); 4372 Result = DAG.getNode(ISD::AND, DL, PtrVT, Result, 4373 DAG.getConstant(~(RequiredAlign - 1), DL, PtrVT)); 4374 } 4375 4376 SDValue Ops[2] = {Result, Chain}; 4377 return DAG.getMergeValues(Ops, DL); 4378 } 4379 4380 SDValue 4381 SystemZTargetLowering::lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, 4382 SelectionDAG &DAG) const { 4383 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 4384 MachineFunction &MF = DAG.getMachineFunction(); 4385 bool RealignOpt = !MF.getFunction().hasFnAttribute("no-realign-stack"); 4386 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); 4387 4388 SDValue Chain = Op.getOperand(0); 4389 SDValue Size = Op.getOperand(1); 4390 SDValue Align = Op.getOperand(2); 4391 SDLoc DL(Op); 4392 4393 // If user has set the no alignment function attribute, ignore 4394 // alloca alignments. 4395 uint64_t AlignVal = (RealignOpt ? Align->getAsZExtVal() : 0); 4396 4397 uint64_t StackAlign = TFI->getStackAlignment(); 4398 uint64_t RequiredAlign = std::max(AlignVal, StackAlign); 4399 uint64_t ExtraAlignSpace = RequiredAlign - StackAlign; 4400 4401 Register SPReg = getStackPointerRegisterToSaveRestore(); 4402 SDValue NeededSpace = Size; 4403 4404 // Get a reference to the stack pointer. 4405 SDValue OldSP = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i64); 4406 4407 // If we need a backchain, save it now. 4408 SDValue Backchain; 4409 if (StoreBackchain) 4410 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), 4411 MachinePointerInfo()); 4412 4413 // Add extra space for alignment if needed. 4414 if (ExtraAlignSpace) 4415 NeededSpace = DAG.getNode(ISD::ADD, DL, MVT::i64, NeededSpace, 4416 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 4417 4418 // Get the new stack pointer value. 4419 SDValue NewSP; 4420 if (hasInlineStackProbe(MF)) { 4421 NewSP = DAG.getNode(SystemZISD::PROBED_ALLOCA, DL, 4422 DAG.getVTList(MVT::i64, MVT::Other), Chain, OldSP, NeededSpace); 4423 Chain = NewSP.getValue(1); 4424 } 4425 else { 4426 NewSP = DAG.getNode(ISD::SUB, DL, MVT::i64, OldSP, NeededSpace); 4427 // Copy the new stack pointer back. 4428 Chain = DAG.getCopyToReg(Chain, DL, SPReg, NewSP); 4429 } 4430 4431 // The allocated data lives above the 160 bytes allocated for the standard 4432 // frame, plus any outgoing stack arguments. We don't know how much that 4433 // amounts to yet, so emit a special ADJDYNALLOC placeholder. 4434 SDValue ArgAdjust = DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 4435 SDValue Result = DAG.getNode(ISD::ADD, DL, MVT::i64, NewSP, ArgAdjust); 4436 4437 // Dynamically realign if needed. 4438 if (RequiredAlign > StackAlign) { 4439 Result = 4440 DAG.getNode(ISD::ADD, DL, MVT::i64, Result, 4441 DAG.getConstant(ExtraAlignSpace, DL, MVT::i64)); 4442 Result = 4443 DAG.getNode(ISD::AND, DL, MVT::i64, Result, 4444 DAG.getConstant(~(RequiredAlign - 1), DL, MVT::i64)); 4445 } 4446 4447 if (StoreBackchain) 4448 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), 4449 MachinePointerInfo()); 4450 4451 SDValue Ops[2] = { Result, Chain }; 4452 return DAG.getMergeValues(Ops, DL); 4453 } 4454 4455 SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET( 4456 SDValue Op, SelectionDAG &DAG) const { 4457 SDLoc DL(Op); 4458 4459 return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64); 4460 } 4461 4462 SDValue SystemZTargetLowering::lowerMULH(SDValue Op, 4463 SelectionDAG &DAG, 4464 unsigned Opcode) const { 4465 EVT VT = Op.getValueType(); 4466 SDLoc DL(Op); 4467 SDValue Even, Odd; 4468 4469 // This custom expander is only used on arch15 and later for 64-bit types. 4470 assert(!is32Bit(VT)); 4471 assert(Subtarget.hasMiscellaneousExtensions2()); 4472 4473 // SystemZISD::xMUL_LOHI returns the low result in the odd register and 4474 // the high result in the even register. Return the latter. 4475 lowerGR128Binary(DAG, DL, VT, Opcode, 4476 Op.getOperand(0), Op.getOperand(1), Even, Odd); 4477 return Even; 4478 } 4479 4480 SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op, 4481 SelectionDAG &DAG) const { 4482 EVT VT = Op.getValueType(); 4483 SDLoc DL(Op); 4484 SDValue Ops[2]; 4485 if (is32Bit(VT)) 4486 // Just do a normal 64-bit multiplication and extract the results. 4487 // We define this so that it can be used for constant division. 4488 lowerMUL_LOHI32(DAG, DL, ISD::SIGN_EXTEND, Op.getOperand(0), 4489 Op.getOperand(1), Ops[1], Ops[0]); 4490 else if (Subtarget.hasMiscellaneousExtensions2()) 4491 // SystemZISD::SMUL_LOHI returns the low result in the odd register and 4492 // the high result in the even register. ISD::SMUL_LOHI is defined to 4493 // return the low half first, so the results are in reverse order. 4494 lowerGR128Binary(DAG, DL, VT, SystemZISD::SMUL_LOHI, 4495 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4496 else { 4497 // Do a full 128-bit multiplication based on SystemZISD::UMUL_LOHI: 4498 // 4499 // (ll * rl) + ((lh * rl) << 64) + ((ll * rh) << 64) 4500 // 4501 // but using the fact that the upper halves are either all zeros 4502 // or all ones: 4503 // 4504 // (ll * rl) - ((lh & rl) << 64) - ((ll & rh) << 64) 4505 // 4506 // and grouping the right terms together since they are quicker than the 4507 // multiplication: 4508 // 4509 // (ll * rl) - (((lh & rl) + (ll & rh)) << 64) 4510 SDValue C63 = DAG.getConstant(63, DL, MVT::i64); 4511 SDValue LL = Op.getOperand(0); 4512 SDValue RL = Op.getOperand(1); 4513 SDValue LH = DAG.getNode(ISD::SRA, DL, VT, LL, C63); 4514 SDValue RH = DAG.getNode(ISD::SRA, DL, VT, RL, C63); 4515 // SystemZISD::UMUL_LOHI returns the low result in the odd register and 4516 // the high result in the even register. ISD::SMUL_LOHI is defined to 4517 // return the low half first, so the results are in reverse order. 4518 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, 4519 LL, RL, Ops[1], Ops[0]); 4520 SDValue NegLLTimesRH = DAG.getNode(ISD::AND, DL, VT, LL, RH); 4521 SDValue NegLHTimesRL = DAG.getNode(ISD::AND, DL, VT, LH, RL); 4522 SDValue NegSum = DAG.getNode(ISD::ADD, DL, VT, NegLLTimesRH, NegLHTimesRL); 4523 Ops[1] = DAG.getNode(ISD::SUB, DL, VT, Ops[1], NegSum); 4524 } 4525 return DAG.getMergeValues(Ops, DL); 4526 } 4527 4528 SDValue SystemZTargetLowering::lowerUMUL_LOHI(SDValue Op, 4529 SelectionDAG &DAG) const { 4530 EVT VT = Op.getValueType(); 4531 SDLoc DL(Op); 4532 SDValue Ops[2]; 4533 if (is32Bit(VT)) 4534 // Just do a normal 64-bit multiplication and extract the results. 4535 // We define this so that it can be used for constant division. 4536 lowerMUL_LOHI32(DAG, DL, ISD::ZERO_EXTEND, Op.getOperand(0), 4537 Op.getOperand(1), Ops[1], Ops[0]); 4538 else 4539 // SystemZISD::UMUL_LOHI returns the low result in the odd register and 4540 // the high result in the even register. ISD::UMUL_LOHI is defined to 4541 // return the low half first, so the results are in reverse order. 4542 lowerGR128Binary(DAG, DL, VT, SystemZISD::UMUL_LOHI, 4543 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4544 return DAG.getMergeValues(Ops, DL); 4545 } 4546 4547 SDValue SystemZTargetLowering::lowerSDIVREM(SDValue Op, 4548 SelectionDAG &DAG) const { 4549 SDValue Op0 = Op.getOperand(0); 4550 SDValue Op1 = Op.getOperand(1); 4551 EVT VT = Op.getValueType(); 4552 SDLoc DL(Op); 4553 4554 // We use DSGF for 32-bit division. This means the first operand must 4555 // always be 64-bit, and the second operand should be 32-bit whenever 4556 // that is possible, to improve performance. 4557 if (is32Bit(VT)) 4558 Op0 = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, Op0); 4559 else if (DAG.ComputeNumSignBits(Op1) > 32) 4560 Op1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Op1); 4561 4562 // DSG(F) returns the remainder in the even register and the 4563 // quotient in the odd register. 4564 SDValue Ops[2]; 4565 lowerGR128Binary(DAG, DL, VT, SystemZISD::SDIVREM, Op0, Op1, Ops[1], Ops[0]); 4566 return DAG.getMergeValues(Ops, DL); 4567 } 4568 4569 SDValue SystemZTargetLowering::lowerUDIVREM(SDValue Op, 4570 SelectionDAG &DAG) const { 4571 EVT VT = Op.getValueType(); 4572 SDLoc DL(Op); 4573 4574 // DL(G) returns the remainder in the even register and the 4575 // quotient in the odd register. 4576 SDValue Ops[2]; 4577 lowerGR128Binary(DAG, DL, VT, SystemZISD::UDIVREM, 4578 Op.getOperand(0), Op.getOperand(1), Ops[1], Ops[0]); 4579 return DAG.getMergeValues(Ops, DL); 4580 } 4581 4582 SDValue SystemZTargetLowering::lowerOR(SDValue Op, SelectionDAG &DAG) const { 4583 assert(Op.getValueType() == MVT::i64 && "Should be 64-bit operation"); 4584 4585 // Get the known-zero masks for each operand. 4586 SDValue Ops[] = {Op.getOperand(0), Op.getOperand(1)}; 4587 KnownBits Known[2] = {DAG.computeKnownBits(Ops[0]), 4588 DAG.computeKnownBits(Ops[1])}; 4589 4590 // See if the upper 32 bits of one operand and the lower 32 bits of the 4591 // other are known zero. They are the low and high operands respectively. 4592 uint64_t Masks[] = { Known[0].Zero.getZExtValue(), 4593 Known[1].Zero.getZExtValue() }; 4594 unsigned High, Low; 4595 if ((Masks[0] >> 32) == 0xffffffff && uint32_t(Masks[1]) == 0xffffffff) 4596 High = 1, Low = 0; 4597 else if ((Masks[1] >> 32) == 0xffffffff && uint32_t(Masks[0]) == 0xffffffff) 4598 High = 0, Low = 1; 4599 else 4600 return Op; 4601 4602 SDValue LowOp = Ops[Low]; 4603 SDValue HighOp = Ops[High]; 4604 4605 // If the high part is a constant, we're better off using IILH. 4606 if (HighOp.getOpcode() == ISD::Constant) 4607 return Op; 4608 4609 // If the low part is a constant that is outside the range of LHI, 4610 // then we're better off using IILF. 4611 if (LowOp.getOpcode() == ISD::Constant) { 4612 int64_t Value = int32_t(LowOp->getAsZExtVal()); 4613 if (!isInt<16>(Value)) 4614 return Op; 4615 } 4616 4617 // Check whether the high part is an AND that doesn't change the 4618 // high 32 bits and just masks out low bits. We can skip it if so. 4619 if (HighOp.getOpcode() == ISD::AND && 4620 HighOp.getOperand(1).getOpcode() == ISD::Constant) { 4621 SDValue HighOp0 = HighOp.getOperand(0); 4622 uint64_t Mask = HighOp.getConstantOperandVal(1); 4623 if (DAG.MaskedValueIsZero(HighOp0, APInt(64, ~(Mask | 0xffffffff)))) 4624 HighOp = HighOp0; 4625 } 4626 4627 // Take advantage of the fact that all GR32 operations only change the 4628 // low 32 bits by truncating Low to an i32 and inserting it directly 4629 // using a subreg. The interesting cases are those where the truncation 4630 // can be folded. 4631 SDLoc DL(Op); 4632 SDValue Low32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, LowOp); 4633 return DAG.getTargetInsertSubreg(SystemZ::subreg_l32, DL, 4634 MVT::i64, HighOp, Low32); 4635 } 4636 4637 // Lower SADDO/SSUBO/UADDO/USUBO nodes. 4638 SDValue SystemZTargetLowering::lowerXALUO(SDValue Op, 4639 SelectionDAG &DAG) const { 4640 SDNode *N = Op.getNode(); 4641 SDValue LHS = N->getOperand(0); 4642 SDValue RHS = N->getOperand(1); 4643 SDLoc DL(N); 4644 4645 if (N->getValueType(0) == MVT::i128) { 4646 unsigned BaseOp = 0; 4647 unsigned FlagOp = 0; 4648 bool IsBorrow = false; 4649 switch (Op.getOpcode()) { 4650 default: llvm_unreachable("Unknown instruction!"); 4651 case ISD::UADDO: 4652 BaseOp = ISD::ADD; 4653 FlagOp = SystemZISD::VACC; 4654 break; 4655 case ISD::USUBO: 4656 BaseOp = ISD::SUB; 4657 FlagOp = SystemZISD::VSCBI; 4658 IsBorrow = true; 4659 break; 4660 } 4661 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS); 4662 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS); 4663 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, 4664 DAG.getValueType(MVT::i1)); 4665 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); 4666 if (IsBorrow) 4667 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), 4668 Flag, DAG.getConstant(1, DL, Flag.getValueType())); 4669 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); 4670 } 4671 4672 unsigned BaseOp = 0; 4673 unsigned CCValid = 0; 4674 unsigned CCMask = 0; 4675 4676 switch (Op.getOpcode()) { 4677 default: llvm_unreachable("Unknown instruction!"); 4678 case ISD::SADDO: 4679 BaseOp = SystemZISD::SADDO; 4680 CCValid = SystemZ::CCMASK_ARITH; 4681 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; 4682 break; 4683 case ISD::SSUBO: 4684 BaseOp = SystemZISD::SSUBO; 4685 CCValid = SystemZ::CCMASK_ARITH; 4686 CCMask = SystemZ::CCMASK_ARITH_OVERFLOW; 4687 break; 4688 case ISD::UADDO: 4689 BaseOp = SystemZISD::UADDO; 4690 CCValid = SystemZ::CCMASK_LOGICAL; 4691 CCMask = SystemZ::CCMASK_LOGICAL_CARRY; 4692 break; 4693 case ISD::USUBO: 4694 BaseOp = SystemZISD::USUBO; 4695 CCValid = SystemZ::CCMASK_LOGICAL; 4696 CCMask = SystemZ::CCMASK_LOGICAL_BORROW; 4697 break; 4698 } 4699 4700 SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32); 4701 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); 4702 4703 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); 4704 if (N->getValueType(1) == MVT::i1) 4705 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); 4706 4707 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); 4708 } 4709 4710 static bool isAddCarryChain(SDValue Carry) { 4711 while (Carry.getOpcode() == ISD::UADDO_CARRY && 4712 Carry->getValueType(0) != MVT::i128) 4713 Carry = Carry.getOperand(2); 4714 return Carry.getOpcode() == ISD::UADDO && 4715 Carry->getValueType(0) != MVT::i128; 4716 } 4717 4718 static bool isSubBorrowChain(SDValue Carry) { 4719 while (Carry.getOpcode() == ISD::USUBO_CARRY && 4720 Carry->getValueType(0) != MVT::i128) 4721 Carry = Carry.getOperand(2); 4722 return Carry.getOpcode() == ISD::USUBO && 4723 Carry->getValueType(0) != MVT::i128; 4724 } 4725 4726 // Lower UADDO_CARRY/USUBO_CARRY nodes. 4727 SDValue SystemZTargetLowering::lowerUADDSUBO_CARRY(SDValue Op, 4728 SelectionDAG &DAG) const { 4729 4730 SDNode *N = Op.getNode(); 4731 MVT VT = N->getSimpleValueType(0); 4732 4733 // Let legalize expand this if it isn't a legal type yet. 4734 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 4735 return SDValue(); 4736 4737 SDValue LHS = N->getOperand(0); 4738 SDValue RHS = N->getOperand(1); 4739 SDValue Carry = Op.getOperand(2); 4740 SDLoc DL(N); 4741 4742 if (VT == MVT::i128) { 4743 unsigned BaseOp = 0; 4744 unsigned FlagOp = 0; 4745 bool IsBorrow = false; 4746 switch (Op.getOpcode()) { 4747 default: llvm_unreachable("Unknown instruction!"); 4748 case ISD::UADDO_CARRY: 4749 BaseOp = SystemZISD::VAC; 4750 FlagOp = SystemZISD::VACCC; 4751 break; 4752 case ISD::USUBO_CARRY: 4753 BaseOp = SystemZISD::VSBI; 4754 FlagOp = SystemZISD::VSBCBI; 4755 IsBorrow = true; 4756 break; 4757 } 4758 if (IsBorrow) 4759 Carry = DAG.getNode(ISD::XOR, DL, Carry.getValueType(), 4760 Carry, DAG.getConstant(1, DL, Carry.getValueType())); 4761 Carry = DAG.getZExtOrTrunc(Carry, DL, MVT::i128); 4762 SDValue Result = DAG.getNode(BaseOp, DL, MVT::i128, LHS, RHS, Carry); 4763 SDValue Flag = DAG.getNode(FlagOp, DL, MVT::i128, LHS, RHS, Carry); 4764 Flag = DAG.getNode(ISD::AssertZext, DL, MVT::i128, Flag, 4765 DAG.getValueType(MVT::i1)); 4766 Flag = DAG.getZExtOrTrunc(Flag, DL, N->getValueType(1)); 4767 if (IsBorrow) 4768 Flag = DAG.getNode(ISD::XOR, DL, Flag.getValueType(), 4769 Flag, DAG.getConstant(1, DL, Flag.getValueType())); 4770 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Flag); 4771 } 4772 4773 unsigned BaseOp = 0; 4774 unsigned CCValid = 0; 4775 unsigned CCMask = 0; 4776 4777 switch (Op.getOpcode()) { 4778 default: llvm_unreachable("Unknown instruction!"); 4779 case ISD::UADDO_CARRY: 4780 if (!isAddCarryChain(Carry)) 4781 return SDValue(); 4782 4783 BaseOp = SystemZISD::ADDCARRY; 4784 CCValid = SystemZ::CCMASK_LOGICAL; 4785 CCMask = SystemZ::CCMASK_LOGICAL_CARRY; 4786 break; 4787 case ISD::USUBO_CARRY: 4788 if (!isSubBorrowChain(Carry)) 4789 return SDValue(); 4790 4791 BaseOp = SystemZISD::SUBCARRY; 4792 CCValid = SystemZ::CCMASK_LOGICAL; 4793 CCMask = SystemZ::CCMASK_LOGICAL_BORROW; 4794 break; 4795 } 4796 4797 // Set the condition code from the carry flag. 4798 Carry = DAG.getNode(SystemZISD::GET_CCMASK, DL, MVT::i32, Carry, 4799 DAG.getConstant(CCValid, DL, MVT::i32), 4800 DAG.getConstant(CCMask, DL, MVT::i32)); 4801 4802 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 4803 SDValue Result = DAG.getNode(BaseOp, DL, VTs, LHS, RHS, Carry); 4804 4805 SDValue SetCC = emitSETCC(DAG, DL, Result.getValue(1), CCValid, CCMask); 4806 if (N->getValueType(1) == MVT::i1) 4807 SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); 4808 4809 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, SetCC); 4810 } 4811 4812 SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, 4813 SelectionDAG &DAG) const { 4814 EVT VT = Op.getValueType(); 4815 SDLoc DL(Op); 4816 Op = Op.getOperand(0); 4817 4818 if (VT.getScalarSizeInBits() == 128) { 4819 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op); 4820 Op = DAG.getNode(ISD::CTPOP, DL, MVT::v2i64, Op); 4821 SDValue Tmp = DAG.getSplatBuildVector(MVT::v2i64, DL, 4822 DAG.getConstant(0, DL, MVT::i64)); 4823 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4824 return Op; 4825 } 4826 4827 // Handle vector types via VPOPCT. 4828 if (VT.isVector()) { 4829 Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); 4830 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); 4831 switch (VT.getScalarSizeInBits()) { 4832 case 8: 4833 break; 4834 case 16: { 4835 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 4836 SDValue Shift = DAG.getConstant(8, DL, MVT::i32); 4837 SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); 4838 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 4839 Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); 4840 break; 4841 } 4842 case 32: { 4843 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, 4844 DAG.getConstant(0, DL, MVT::i32)); 4845 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4846 break; 4847 } 4848 case 64: { 4849 SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, 4850 DAG.getConstant(0, DL, MVT::i32)); 4851 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); 4852 Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); 4853 break; 4854 } 4855 default: 4856 llvm_unreachable("Unexpected type"); 4857 } 4858 return Op; 4859 } 4860 4861 // Get the known-zero mask for the operand. 4862 KnownBits Known = DAG.computeKnownBits(Op); 4863 unsigned NumSignificantBits = Known.getMaxValue().getActiveBits(); 4864 if (NumSignificantBits == 0) 4865 return DAG.getConstant(0, DL, VT); 4866 4867 // Skip known-zero high parts of the operand. 4868 int64_t OrigBitSize = VT.getSizeInBits(); 4869 int64_t BitSize = llvm::bit_ceil(NumSignificantBits); 4870 BitSize = std::min(BitSize, OrigBitSize); 4871 4872 // The POPCNT instruction counts the number of bits in each byte. 4873 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); 4874 Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); 4875 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 4876 4877 // Add up per-byte counts in a binary tree. All bits of Op at 4878 // position larger than BitSize remain zero throughout. 4879 for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { 4880 SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, DL, VT)); 4881 if (BitSize != OrigBitSize) 4882 Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, 4883 DAG.getConstant(((uint64_t)1 << BitSize) - 1, DL, VT)); 4884 Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); 4885 } 4886 4887 // Extract overall result from high byte. 4888 if (BitSize > 8) 4889 Op = DAG.getNode(ISD::SRL, DL, VT, Op, 4890 DAG.getConstant(BitSize - 8, DL, VT)); 4891 4892 return Op; 4893 } 4894 4895 SDValue SystemZTargetLowering::lowerATOMIC_FENCE(SDValue Op, 4896 SelectionDAG &DAG) const { 4897 SDLoc DL(Op); 4898 AtomicOrdering FenceOrdering = 4899 static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); 4900 SyncScope::ID FenceSSID = 4901 static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); 4902 4903 // The only fence that needs an instruction is a sequentially-consistent 4904 // cross-thread fence. 4905 if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && 4906 FenceSSID == SyncScope::System) { 4907 return SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, MVT::Other, 4908 Op.getOperand(0)), 4909 0); 4910 } 4911 4912 // MEMBARRIER is a compiler barrier; it codegens to a no-op. 4913 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0)); 4914 } 4915 4916 SDValue SystemZTargetLowering::lowerATOMIC_LDST_I128(SDValue Op, 4917 SelectionDAG &DAG) const { 4918 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4919 assert( 4920 (Node->getMemoryVT() == MVT::i128 || Node->getMemoryVT() == MVT::f128) && 4921 "Only custom lowering i128 or f128."); 4922 // Use same code to handle both legal and non-legal i128 types. 4923 SmallVector<SDValue, 2> Results; 4924 LowerOperationWrapper(Node, Results, DAG); 4925 return DAG.getMergeValues(Results, SDLoc(Op)); 4926 } 4927 4928 // Prepare for a Compare And Swap for a subword operation. This needs to be 4929 // done in memory with 4 bytes at natural alignment. 4930 static void getCSAddressAndShifts(SDValue Addr, SelectionDAG &DAG, SDLoc DL, 4931 SDValue &AlignedAddr, SDValue &BitShift, 4932 SDValue &NegBitShift) { 4933 EVT PtrVT = Addr.getValueType(); 4934 EVT WideVT = MVT::i32; 4935 4936 // Get the address of the containing word. 4937 AlignedAddr = DAG.getNode(ISD::AND, DL, PtrVT, Addr, 4938 DAG.getSignedConstant(-4, DL, PtrVT)); 4939 4940 // Get the number of bits that the word must be rotated left in order 4941 // to bring the field to the top bits of a GR32. 4942 BitShift = DAG.getNode(ISD::SHL, DL, PtrVT, Addr, 4943 DAG.getConstant(3, DL, PtrVT)); 4944 BitShift = DAG.getNode(ISD::TRUNCATE, DL, WideVT, BitShift); 4945 4946 // Get the complementing shift amount, for rotating a field in the top 4947 // bits back to its proper position. 4948 NegBitShift = DAG.getNode(ISD::SUB, DL, WideVT, 4949 DAG.getConstant(0, DL, WideVT), BitShift); 4950 4951 } 4952 4953 // Op is an 8-, 16-bit or 32-bit ATOMIC_LOAD_* operation. Lower the first 4954 // two into the fullword ATOMIC_LOADW_* operation given by Opcode. 4955 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_OP(SDValue Op, 4956 SelectionDAG &DAG, 4957 unsigned Opcode) const { 4958 auto *Node = cast<AtomicSDNode>(Op.getNode()); 4959 4960 // 32-bit operations need no special handling. 4961 EVT NarrowVT = Node->getMemoryVT(); 4962 EVT WideVT = MVT::i32; 4963 if (NarrowVT == WideVT) 4964 return Op; 4965 4966 int64_t BitSize = NarrowVT.getSizeInBits(); 4967 SDValue ChainIn = Node->getChain(); 4968 SDValue Addr = Node->getBasePtr(); 4969 SDValue Src2 = Node->getVal(); 4970 MachineMemOperand *MMO = Node->getMemOperand(); 4971 SDLoc DL(Node); 4972 4973 // Convert atomic subtracts of constants into additions. 4974 if (Opcode == SystemZISD::ATOMIC_LOADW_SUB) 4975 if (auto *Const = dyn_cast<ConstantSDNode>(Src2)) { 4976 Opcode = SystemZISD::ATOMIC_LOADW_ADD; 4977 Src2 = DAG.getSignedConstant(-Const->getSExtValue(), DL, 4978 Src2.getValueType()); 4979 } 4980 4981 SDValue AlignedAddr, BitShift, NegBitShift; 4982 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); 4983 4984 // Extend the source operand to 32 bits and prepare it for the inner loop. 4985 // ATOMIC_SWAPW uses RISBG to rotate the field left, but all other 4986 // operations require the source to be shifted in advance. (This shift 4987 // can be folded if the source is constant.) For AND and NAND, the lower 4988 // bits must be set, while for other opcodes they should be left clear. 4989 if (Opcode != SystemZISD::ATOMIC_SWAPW) 4990 Src2 = DAG.getNode(ISD::SHL, DL, WideVT, Src2, 4991 DAG.getConstant(32 - BitSize, DL, WideVT)); 4992 if (Opcode == SystemZISD::ATOMIC_LOADW_AND || 4993 Opcode == SystemZISD::ATOMIC_LOADW_NAND) 4994 Src2 = DAG.getNode(ISD::OR, DL, WideVT, Src2, 4995 DAG.getConstant(uint32_t(-1) >> BitSize, DL, WideVT)); 4996 4997 // Construct the ATOMIC_LOADW_* node. 4998 SDVTList VTList = DAG.getVTList(WideVT, MVT::Other); 4999 SDValue Ops[] = { ChainIn, AlignedAddr, Src2, BitShift, NegBitShift, 5000 DAG.getConstant(BitSize, DL, WideVT) }; 5001 SDValue AtomicOp = DAG.getMemIntrinsicNode(Opcode, DL, VTList, Ops, 5002 NarrowVT, MMO); 5003 5004 // Rotate the result of the final CS so that the field is in the lower 5005 // bits of a GR32, then truncate it. 5006 SDValue ResultShift = DAG.getNode(ISD::ADD, DL, WideVT, BitShift, 5007 DAG.getConstant(BitSize, DL, WideVT)); 5008 SDValue Result = DAG.getNode(ISD::ROTL, DL, WideVT, AtomicOp, ResultShift); 5009 5010 SDValue RetOps[2] = { Result, AtomicOp.getValue(1) }; 5011 return DAG.getMergeValues(RetOps, DL); 5012 } 5013 5014 // Op is an ATOMIC_LOAD_SUB operation. Lower 8- and 16-bit operations into 5015 // ATOMIC_LOADW_SUBs and convert 32- and 64-bit operations into additions. 5016 SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, 5017 SelectionDAG &DAG) const { 5018 auto *Node = cast<AtomicSDNode>(Op.getNode()); 5019 EVT MemVT = Node->getMemoryVT(); 5020 if (MemVT == MVT::i32 || MemVT == MVT::i64) { 5021 // A full-width operation: negate and use LAA(G). 5022 assert(Op.getValueType() == MemVT && "Mismatched VTs"); 5023 assert(Subtarget.hasInterlockedAccess1() && 5024 "Should have been expanded by AtomicExpand pass."); 5025 SDValue Src2 = Node->getVal(); 5026 SDLoc DL(Src2); 5027 SDValue NegSrc2 = 5028 DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, DL, MemVT), Src2); 5029 return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT, 5030 Node->getChain(), Node->getBasePtr(), NegSrc2, 5031 Node->getMemOperand()); 5032 } 5033 5034 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_SUB); 5035 } 5036 5037 // Lower 8/16/32/64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS node. 5038 SDValue SystemZTargetLowering::lowerATOMIC_CMP_SWAP(SDValue Op, 5039 SelectionDAG &DAG) const { 5040 auto *Node = cast<AtomicSDNode>(Op.getNode()); 5041 SDValue ChainIn = Node->getOperand(0); 5042 SDValue Addr = Node->getOperand(1); 5043 SDValue CmpVal = Node->getOperand(2); 5044 SDValue SwapVal = Node->getOperand(3); 5045 MachineMemOperand *MMO = Node->getMemOperand(); 5046 SDLoc DL(Node); 5047 5048 if (Node->getMemoryVT() == MVT::i128) { 5049 // Use same code to handle both legal and non-legal i128 types. 5050 SmallVector<SDValue, 3> Results; 5051 LowerOperationWrapper(Node, Results, DAG); 5052 return DAG.getMergeValues(Results, DL); 5053 } 5054 5055 // We have native support for 32-bit and 64-bit compare and swap, but we 5056 // still need to expand extracting the "success" result from the CC. 5057 EVT NarrowVT = Node->getMemoryVT(); 5058 EVT WideVT = NarrowVT == MVT::i64 ? MVT::i64 : MVT::i32; 5059 if (NarrowVT == WideVT) { 5060 SDVTList Tys = DAG.getVTList(WideVT, MVT::i32, MVT::Other); 5061 SDValue Ops[] = { ChainIn, Addr, CmpVal, SwapVal }; 5062 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP, 5063 DL, Tys, Ops, NarrowVT, MMO); 5064 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), 5065 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); 5066 5067 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), AtomicOp.getValue(0)); 5068 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); 5069 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); 5070 return SDValue(); 5071 } 5072 5073 // Convert 8-bit and 16-bit compare and swap to a loop, implemented 5074 // via a fullword ATOMIC_CMP_SWAPW operation. 5075 int64_t BitSize = NarrowVT.getSizeInBits(); 5076 5077 SDValue AlignedAddr, BitShift, NegBitShift; 5078 getCSAddressAndShifts(Addr, DAG, DL, AlignedAddr, BitShift, NegBitShift); 5079 5080 // Construct the ATOMIC_CMP_SWAPW node. 5081 SDVTList VTList = DAG.getVTList(WideVT, MVT::i32, MVT::Other); 5082 SDValue Ops[] = { ChainIn, AlignedAddr, CmpVal, SwapVal, BitShift, 5083 NegBitShift, DAG.getConstant(BitSize, DL, WideVT) }; 5084 SDValue AtomicOp = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAPW, DL, 5085 VTList, Ops, NarrowVT, MMO); 5086 SDValue Success = emitSETCC(DAG, DL, AtomicOp.getValue(1), 5087 SystemZ::CCMASK_ICMP, SystemZ::CCMASK_CMP_EQ); 5088 5089 // emitAtomicCmpSwapW() will zero extend the result (original value). 5090 SDValue OrigVal = DAG.getNode(ISD::AssertZext, DL, WideVT, AtomicOp.getValue(0), 5091 DAG.getValueType(NarrowVT)); 5092 DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), OrigVal); 5093 DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); 5094 DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), AtomicOp.getValue(2)); 5095 return SDValue(); 5096 } 5097 5098 MachineMemOperand::Flags 5099 SystemZTargetLowering::getTargetMMOFlags(const Instruction &I) const { 5100 // Because of how we convert atomic_load and atomic_store to normal loads and 5101 // stores in the DAG, we need to ensure that the MMOs are marked volatile 5102 // since DAGCombine hasn't been updated to account for atomic, but non 5103 // volatile loads. (See D57601) 5104 if (auto *SI = dyn_cast<StoreInst>(&I)) 5105 if (SI->isAtomic()) 5106 return MachineMemOperand::MOVolatile; 5107 if (auto *LI = dyn_cast<LoadInst>(&I)) 5108 if (LI->isAtomic()) 5109 return MachineMemOperand::MOVolatile; 5110 if (auto *AI = dyn_cast<AtomicRMWInst>(&I)) 5111 if (AI->isAtomic()) 5112 return MachineMemOperand::MOVolatile; 5113 if (auto *AI = dyn_cast<AtomicCmpXchgInst>(&I)) 5114 if (AI->isAtomic()) 5115 return MachineMemOperand::MOVolatile; 5116 return MachineMemOperand::MONone; 5117 } 5118 5119 SDValue SystemZTargetLowering::lowerSTACKSAVE(SDValue Op, 5120 SelectionDAG &DAG) const { 5121 MachineFunction &MF = DAG.getMachineFunction(); 5122 auto *Regs = Subtarget.getSpecialRegisters(); 5123 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 5124 report_fatal_error("Variable-sized stack allocations are not supported " 5125 "in GHC calling convention"); 5126 return DAG.getCopyFromReg(Op.getOperand(0), SDLoc(Op), 5127 Regs->getStackPointerRegister(), Op.getValueType()); 5128 } 5129 5130 SDValue SystemZTargetLowering::lowerSTACKRESTORE(SDValue Op, 5131 SelectionDAG &DAG) const { 5132 MachineFunction &MF = DAG.getMachineFunction(); 5133 auto *Regs = Subtarget.getSpecialRegisters(); 5134 bool StoreBackchain = MF.getSubtarget<SystemZSubtarget>().hasBackChain(); 5135 5136 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 5137 report_fatal_error("Variable-sized stack allocations are not supported " 5138 "in GHC calling convention"); 5139 5140 SDValue Chain = Op.getOperand(0); 5141 SDValue NewSP = Op.getOperand(1); 5142 SDValue Backchain; 5143 SDLoc DL(Op); 5144 5145 if (StoreBackchain) { 5146 SDValue OldSP = DAG.getCopyFromReg( 5147 Chain, DL, Regs->getStackPointerRegister(), MVT::i64); 5148 Backchain = DAG.getLoad(MVT::i64, DL, Chain, getBackchainAddress(OldSP, DAG), 5149 MachinePointerInfo()); 5150 } 5151 5152 Chain = DAG.getCopyToReg(Chain, DL, Regs->getStackPointerRegister(), NewSP); 5153 5154 if (StoreBackchain) 5155 Chain = DAG.getStore(Chain, DL, Backchain, getBackchainAddress(NewSP, DAG), 5156 MachinePointerInfo()); 5157 5158 return Chain; 5159 } 5160 5161 SDValue SystemZTargetLowering::lowerPREFETCH(SDValue Op, 5162 SelectionDAG &DAG) const { 5163 bool IsData = Op.getConstantOperandVal(4); 5164 if (!IsData) 5165 // Just preserve the chain. 5166 return Op.getOperand(0); 5167 5168 SDLoc DL(Op); 5169 bool IsWrite = Op.getConstantOperandVal(2); 5170 unsigned Code = IsWrite ? SystemZ::PFD_WRITE : SystemZ::PFD_READ; 5171 auto *Node = cast<MemIntrinsicSDNode>(Op.getNode()); 5172 SDValue Ops[] = {Op.getOperand(0), DAG.getTargetConstant(Code, DL, MVT::i32), 5173 Op.getOperand(1)}; 5174 return DAG.getMemIntrinsicNode(SystemZISD::PREFETCH, DL, 5175 Node->getVTList(), Ops, 5176 Node->getMemoryVT(), Node->getMemOperand()); 5177 } 5178 5179 // Convert condition code in CCReg to an i32 value. 5180 static SDValue getCCResult(SelectionDAG &DAG, SDValue CCReg) { 5181 SDLoc DL(CCReg); 5182 SDValue IPM = DAG.getNode(SystemZISD::IPM, DL, MVT::i32, CCReg); 5183 return DAG.getNode(ISD::SRL, DL, MVT::i32, IPM, 5184 DAG.getConstant(SystemZ::IPM_CC, DL, MVT::i32)); 5185 } 5186 5187 SDValue 5188 SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, 5189 SelectionDAG &DAG) const { 5190 unsigned Opcode, CCValid; 5191 if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { 5192 assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); 5193 SDNode *Node = emitIntrinsicWithCCAndChain(DAG, Op, Opcode); 5194 SDValue CC = getCCResult(DAG, SDValue(Node, 0)); 5195 DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); 5196 return SDValue(); 5197 } 5198 5199 return SDValue(); 5200 } 5201 5202 SDValue 5203 SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, 5204 SelectionDAG &DAG) const { 5205 unsigned Opcode, CCValid; 5206 if (isIntrinsicWithCC(Op, Opcode, CCValid)) { 5207 SDNode *Node = emitIntrinsicWithCC(DAG, Op, Opcode); 5208 if (Op->getNumValues() == 1) 5209 return getCCResult(DAG, SDValue(Node, 0)); 5210 assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); 5211 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), 5212 SDValue(Node, 0), getCCResult(DAG, SDValue(Node, 1))); 5213 } 5214 5215 unsigned Id = Op.getConstantOperandVal(0); 5216 switch (Id) { 5217 case Intrinsic::thread_pointer: 5218 return lowerThreadPointer(SDLoc(Op), DAG); 5219 5220 case Intrinsic::s390_vpdi: 5221 return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), 5222 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 5223 5224 case Intrinsic::s390_vperm: 5225 return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), 5226 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 5227 5228 case Intrinsic::s390_vuphb: 5229 case Intrinsic::s390_vuphh: 5230 case Intrinsic::s390_vuphf: 5231 case Intrinsic::s390_vuphg: 5232 return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), 5233 Op.getOperand(1)); 5234 5235 case Intrinsic::s390_vuplhb: 5236 case Intrinsic::s390_vuplhh: 5237 case Intrinsic::s390_vuplhf: 5238 case Intrinsic::s390_vuplhg: 5239 return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), 5240 Op.getOperand(1)); 5241 5242 case Intrinsic::s390_vuplb: 5243 case Intrinsic::s390_vuplhw: 5244 case Intrinsic::s390_vuplf: 5245 case Intrinsic::s390_vuplg: 5246 return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), 5247 Op.getOperand(1)); 5248 5249 case Intrinsic::s390_vupllb: 5250 case Intrinsic::s390_vupllh: 5251 case Intrinsic::s390_vupllf: 5252 case Intrinsic::s390_vupllg: 5253 return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), 5254 Op.getOperand(1)); 5255 5256 case Intrinsic::s390_vsumb: 5257 case Intrinsic::s390_vsumh: 5258 case Intrinsic::s390_vsumgh: 5259 case Intrinsic::s390_vsumgf: 5260 case Intrinsic::s390_vsumqf: 5261 case Intrinsic::s390_vsumqg: 5262 return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), 5263 Op.getOperand(1), Op.getOperand(2)); 5264 5265 case Intrinsic::s390_vaq: 5266 return DAG.getNode(ISD::ADD, SDLoc(Op), Op.getValueType(), 5267 Op.getOperand(1), Op.getOperand(2)); 5268 case Intrinsic::s390_vaccb: 5269 case Intrinsic::s390_vacch: 5270 case Intrinsic::s390_vaccf: 5271 case Intrinsic::s390_vaccg: 5272 case Intrinsic::s390_vaccq: 5273 return DAG.getNode(SystemZISD::VACC, SDLoc(Op), Op.getValueType(), 5274 Op.getOperand(1), Op.getOperand(2)); 5275 case Intrinsic::s390_vacq: 5276 return DAG.getNode(SystemZISD::VAC, SDLoc(Op), Op.getValueType(), 5277 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 5278 case Intrinsic::s390_vacccq: 5279 return DAG.getNode(SystemZISD::VACCC, SDLoc(Op), Op.getValueType(), 5280 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 5281 5282 case Intrinsic::s390_vsq: 5283 return DAG.getNode(ISD::SUB, SDLoc(Op), Op.getValueType(), 5284 Op.getOperand(1), Op.getOperand(2)); 5285 case Intrinsic::s390_vscbib: 5286 case Intrinsic::s390_vscbih: 5287 case Intrinsic::s390_vscbif: 5288 case Intrinsic::s390_vscbig: 5289 case Intrinsic::s390_vscbiq: 5290 return DAG.getNode(SystemZISD::VSCBI, SDLoc(Op), Op.getValueType(), 5291 Op.getOperand(1), Op.getOperand(2)); 5292 case Intrinsic::s390_vsbiq: 5293 return DAG.getNode(SystemZISD::VSBI, SDLoc(Op), Op.getValueType(), 5294 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 5295 case Intrinsic::s390_vsbcbiq: 5296 return DAG.getNode(SystemZISD::VSBCBI, SDLoc(Op), Op.getValueType(), 5297 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); 5298 } 5299 5300 return SDValue(); 5301 } 5302 5303 namespace { 5304 // Says that SystemZISD operation Opcode can be used to perform the equivalent 5305 // of a VPERM with permute vector Bytes. If Opcode takes three operands, 5306 // Operand is the constant third operand, otherwise it is the number of 5307 // bytes in each element of the result. 5308 struct Permute { 5309 unsigned Opcode; 5310 unsigned Operand; 5311 unsigned char Bytes[SystemZ::VectorBytes]; 5312 }; 5313 } 5314 5315 static const Permute PermuteForms[] = { 5316 // VMRHG 5317 { SystemZISD::MERGE_HIGH, 8, 5318 { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, 5319 // VMRHF 5320 { SystemZISD::MERGE_HIGH, 4, 5321 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, 5322 // VMRHH 5323 { SystemZISD::MERGE_HIGH, 2, 5324 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, 5325 // VMRHB 5326 { SystemZISD::MERGE_HIGH, 1, 5327 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, 5328 // VMRLG 5329 { SystemZISD::MERGE_LOW, 8, 5330 { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, 5331 // VMRLF 5332 { SystemZISD::MERGE_LOW, 4, 5333 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, 5334 // VMRLH 5335 { SystemZISD::MERGE_LOW, 2, 5336 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, 5337 // VMRLB 5338 { SystemZISD::MERGE_LOW, 1, 5339 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, 5340 // VPKG 5341 { SystemZISD::PACK, 4, 5342 { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, 5343 // VPKF 5344 { SystemZISD::PACK, 2, 5345 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, 5346 // VPKH 5347 { SystemZISD::PACK, 1, 5348 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, 5349 // VPDI V1, V2, 4 (low half of V1, high half of V2) 5350 { SystemZISD::PERMUTE_DWORDS, 4, 5351 { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, 5352 // VPDI V1, V2, 1 (high half of V1, low half of V2) 5353 { SystemZISD::PERMUTE_DWORDS, 1, 5354 { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } 5355 }; 5356 5357 // Called after matching a vector shuffle against a particular pattern. 5358 // Both the original shuffle and the pattern have two vector operands. 5359 // OpNos[0] is the operand of the original shuffle that should be used for 5360 // operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. 5361 // OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and 5362 // set OpNo0 and OpNo1 to the shuffle operands that should actually be used 5363 // for operands 0 and 1 of the pattern. 5364 static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { 5365 if (OpNos[0] < 0) { 5366 if (OpNos[1] < 0) 5367 return false; 5368 OpNo0 = OpNo1 = OpNos[1]; 5369 } else if (OpNos[1] < 0) { 5370 OpNo0 = OpNo1 = OpNos[0]; 5371 } else { 5372 OpNo0 = OpNos[0]; 5373 OpNo1 = OpNos[1]; 5374 } 5375 return true; 5376 } 5377 5378 // Bytes is a VPERM-like permute vector, except that -1 is used for 5379 // undefined bytes. Return true if the VPERM can be implemented using P. 5380 // When returning true set OpNo0 to the VPERM operand that should be 5381 // used for operand 0 of P and likewise OpNo1 for operand 1 of P. 5382 // 5383 // For example, if swapping the VPERM operands allows P to match, OpNo0 5384 // will be 1 and OpNo1 will be 0. If instead Bytes only refers to one 5385 // operand, but rewriting it to use two duplicated operands allows it to 5386 // match P, then OpNo0 and OpNo1 will be the same. 5387 static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P, 5388 unsigned &OpNo0, unsigned &OpNo1) { 5389 int OpNos[] = { -1, -1 }; 5390 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5391 int Elt = Bytes[I]; 5392 if (Elt >= 0) { 5393 // Make sure that the two permute vectors use the same suboperand 5394 // byte number. Only the operand numbers (the high bits) are 5395 // allowed to differ. 5396 if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) 5397 return false; 5398 int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; 5399 int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; 5400 // Make sure that the operand mappings are consistent with previous 5401 // elements. 5402 if (OpNos[ModelOpNo] == 1 - RealOpNo) 5403 return false; 5404 OpNos[ModelOpNo] = RealOpNo; 5405 } 5406 } 5407 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 5408 } 5409 5410 // As above, but search for a matching permute. 5411 static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes, 5412 unsigned &OpNo0, unsigned &OpNo1) { 5413 for (auto &P : PermuteForms) 5414 if (matchPermute(Bytes, P, OpNo0, OpNo1)) 5415 return &P; 5416 return nullptr; 5417 } 5418 5419 // Bytes is a VPERM-like permute vector, except that -1 is used for 5420 // undefined bytes. This permute is an operand of an outer permute. 5421 // See whether redistributing the -1 bytes gives a shuffle that can be 5422 // implemented using P. If so, set Transform to a VPERM-like permute vector 5423 // that, when applied to the result of P, gives the original permute in Bytes. 5424 static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes, 5425 const Permute &P, 5426 SmallVectorImpl<int> &Transform) { 5427 unsigned To = 0; 5428 for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { 5429 int Elt = Bytes[From]; 5430 if (Elt < 0) 5431 // Byte number From of the result is undefined. 5432 Transform[From] = -1; 5433 else { 5434 while (P.Bytes[To] != Elt) { 5435 To += 1; 5436 if (To == SystemZ::VectorBytes) 5437 return false; 5438 } 5439 Transform[From] = To; 5440 } 5441 } 5442 return true; 5443 } 5444 5445 // As above, but search for a matching permute. 5446 static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes, 5447 SmallVectorImpl<int> &Transform) { 5448 for (auto &P : PermuteForms) 5449 if (matchDoublePermute(Bytes, P, Transform)) 5450 return &P; 5451 return nullptr; 5452 } 5453 5454 // Convert the mask of the given shuffle op into a byte-level mask, 5455 // as if it had type vNi8. 5456 static bool getVPermMask(SDValue ShuffleOp, 5457 SmallVectorImpl<int> &Bytes) { 5458 EVT VT = ShuffleOp.getValueType(); 5459 unsigned NumElements = VT.getVectorNumElements(); 5460 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5461 5462 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(ShuffleOp)) { 5463 Bytes.resize(NumElements * BytesPerElement, -1); 5464 for (unsigned I = 0; I < NumElements; ++I) { 5465 int Index = VSN->getMaskElt(I); 5466 if (Index >= 0) 5467 for (unsigned J = 0; J < BytesPerElement; ++J) 5468 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; 5469 } 5470 return true; 5471 } 5472 if (SystemZISD::SPLAT == ShuffleOp.getOpcode() && 5473 isa<ConstantSDNode>(ShuffleOp.getOperand(1))) { 5474 unsigned Index = ShuffleOp.getConstantOperandVal(1); 5475 Bytes.resize(NumElements * BytesPerElement, -1); 5476 for (unsigned I = 0; I < NumElements; ++I) 5477 for (unsigned J = 0; J < BytesPerElement; ++J) 5478 Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; 5479 return true; 5480 } 5481 return false; 5482 } 5483 5484 // Bytes is a VPERM-like permute vector, except that -1 is used for 5485 // undefined bytes. See whether bytes [Start, Start + BytesPerElement) of 5486 // the result come from a contiguous sequence of bytes from one input. 5487 // Set Base to the selector for the first byte if so. 5488 static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start, 5489 unsigned BytesPerElement, int &Base) { 5490 Base = -1; 5491 for (unsigned I = 0; I < BytesPerElement; ++I) { 5492 if (Bytes[Start + I] >= 0) { 5493 unsigned Elem = Bytes[Start + I]; 5494 if (Base < 0) { 5495 Base = Elem - I; 5496 // Make sure the bytes would come from one input operand. 5497 if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) 5498 return false; 5499 } else if (unsigned(Base) != Elem - I) 5500 return false; 5501 } 5502 } 5503 return true; 5504 } 5505 5506 // Bytes is a VPERM-like permute vector, except that -1 is used for 5507 // undefined bytes. Return true if it can be performed using VSLDB. 5508 // When returning true, set StartIndex to the shift amount and OpNo0 5509 // and OpNo1 to the VPERM operands that should be used as the first 5510 // and second shift operand respectively. 5511 static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes, 5512 unsigned &StartIndex, unsigned &OpNo0, 5513 unsigned &OpNo1) { 5514 int OpNos[] = { -1, -1 }; 5515 int Shift = -1; 5516 for (unsigned I = 0; I < 16; ++I) { 5517 int Index = Bytes[I]; 5518 if (Index >= 0) { 5519 int ExpectedShift = (Index - I) % SystemZ::VectorBytes; 5520 int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; 5521 int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; 5522 if (Shift < 0) 5523 Shift = ExpectedShift; 5524 else if (Shift != ExpectedShift) 5525 return false; 5526 // Make sure that the operand mappings are consistent with previous 5527 // elements. 5528 if (OpNos[ModelOpNo] == 1 - RealOpNo) 5529 return false; 5530 OpNos[ModelOpNo] = RealOpNo; 5531 } 5532 } 5533 StartIndex = Shift; 5534 return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); 5535 } 5536 5537 // Create a node that performs P on operands Op0 and Op1, casting the 5538 // operands to the appropriate type. The type of the result is determined by P. 5539 static SDValue getPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 5540 const Permute &P, SDValue Op0, SDValue Op1) { 5541 // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input 5542 // elements of a PACK are twice as wide as the outputs. 5543 unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : 5544 P.Opcode == SystemZISD::PACK ? P.Operand * 2 : 5545 P.Operand); 5546 // Cast both operands to the appropriate type. 5547 MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), 5548 SystemZ::VectorBytes / InBytes); 5549 Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); 5550 Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); 5551 SDValue Op; 5552 if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { 5553 SDValue Op2 = DAG.getTargetConstant(P.Operand, DL, MVT::i32); 5554 Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); 5555 } else if (P.Opcode == SystemZISD::PACK) { 5556 MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), 5557 SystemZ::VectorBytes / P.Operand); 5558 Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); 5559 } else { 5560 Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); 5561 } 5562 return Op; 5563 } 5564 5565 static bool isZeroVector(SDValue N) { 5566 if (N->getOpcode() == ISD::BITCAST) 5567 N = N->getOperand(0); 5568 if (N->getOpcode() == ISD::SPLAT_VECTOR) 5569 if (auto *Op = dyn_cast<ConstantSDNode>(N->getOperand(0))) 5570 return Op->getZExtValue() == 0; 5571 return ISD::isBuildVectorAllZeros(N.getNode()); 5572 } 5573 5574 // Return the index of the zero/undef vector, or UINT32_MAX if not found. 5575 static uint32_t findZeroVectorIdx(SDValue *Ops, unsigned Num) { 5576 for (unsigned I = 0; I < Num ; I++) 5577 if (isZeroVector(Ops[I])) 5578 return I; 5579 return UINT32_MAX; 5580 } 5581 5582 // Bytes is a VPERM-like permute vector, except that -1 is used for 5583 // undefined bytes. Implement it on operands Ops[0] and Ops[1] using 5584 // VSLDB or VPERM. 5585 static SDValue getGeneralPermuteNode(SelectionDAG &DAG, const SDLoc &DL, 5586 SDValue *Ops, 5587 const SmallVectorImpl<int> &Bytes) { 5588 for (unsigned I = 0; I < 2; ++I) 5589 Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); 5590 5591 // First see whether VSLDB can be used. 5592 unsigned StartIndex, OpNo0, OpNo1; 5593 if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) 5594 return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], 5595 Ops[OpNo1], 5596 DAG.getTargetConstant(StartIndex, DL, MVT::i32)); 5597 5598 // Fall back on VPERM. Construct an SDNode for the permute vector. Try to 5599 // eliminate a zero vector by reusing any zero index in the permute vector. 5600 unsigned ZeroVecIdx = findZeroVectorIdx(&Ops[0], 2); 5601 if (ZeroVecIdx != UINT32_MAX) { 5602 bool MaskFirst = true; 5603 int ZeroIdx = -1; 5604 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5605 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5606 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; 5607 if (OpNo == ZeroVecIdx && I == 0) { 5608 // If the first byte is zero, use mask as first operand. 5609 ZeroIdx = 0; 5610 break; 5611 } 5612 if (OpNo != ZeroVecIdx && Byte == 0) { 5613 // If mask contains a zero, use it by placing that vector first. 5614 ZeroIdx = I + SystemZ::VectorBytes; 5615 MaskFirst = false; 5616 break; 5617 } 5618 } 5619 if (ZeroIdx != -1) { 5620 SDValue IndexNodes[SystemZ::VectorBytes]; 5621 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { 5622 if (Bytes[I] >= 0) { 5623 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5624 unsigned Byte = unsigned(Bytes[I]) % SystemZ::VectorBytes; 5625 if (OpNo == ZeroVecIdx) 5626 IndexNodes[I] = DAG.getConstant(ZeroIdx, DL, MVT::i32); 5627 else { 5628 unsigned BIdx = MaskFirst ? Byte + SystemZ::VectorBytes : Byte; 5629 IndexNodes[I] = DAG.getConstant(BIdx, DL, MVT::i32); 5630 } 5631 } else 5632 IndexNodes[I] = DAG.getUNDEF(MVT::i32); 5633 } 5634 SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); 5635 SDValue Src = ZeroVecIdx == 0 ? Ops[1] : Ops[0]; 5636 if (MaskFirst) 5637 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Mask, Src, 5638 Mask); 5639 else 5640 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Src, Mask, 5641 Mask); 5642 } 5643 } 5644 5645 SDValue IndexNodes[SystemZ::VectorBytes]; 5646 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5647 if (Bytes[I] >= 0) 5648 IndexNodes[I] = DAG.getConstant(Bytes[I], DL, MVT::i32); 5649 else 5650 IndexNodes[I] = DAG.getUNDEF(MVT::i32); 5651 SDValue Op2 = DAG.getBuildVector(MVT::v16i8, DL, IndexNodes); 5652 return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], 5653 (!Ops[1].isUndef() ? Ops[1] : Ops[0]), Op2); 5654 } 5655 5656 namespace { 5657 // Describes a general N-operand vector shuffle. 5658 struct GeneralShuffle { 5659 GeneralShuffle(EVT vt) : VT(vt), UnpackFromEltSize(UINT_MAX) {} 5660 void addUndef(); 5661 bool add(SDValue, unsigned); 5662 SDValue getNode(SelectionDAG &, const SDLoc &); 5663 void tryPrepareForUnpack(); 5664 bool unpackWasPrepared() { return UnpackFromEltSize <= 4; } 5665 SDValue insertUnpackIfPrepared(SelectionDAG &DAG, const SDLoc &DL, SDValue Op); 5666 5667 // The operands of the shuffle. 5668 SmallVector<SDValue, SystemZ::VectorBytes> Ops; 5669 5670 // Index I is -1 if byte I of the result is undefined. Otherwise the 5671 // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand 5672 // Bytes[I] / SystemZ::VectorBytes. 5673 SmallVector<int, SystemZ::VectorBytes> Bytes; 5674 5675 // The type of the shuffle result. 5676 EVT VT; 5677 5678 // Holds a value of 1, 2 or 4 if a final unpack has been prepared for. 5679 unsigned UnpackFromEltSize; 5680 }; 5681 } 5682 5683 // Add an extra undefined element to the shuffle. 5684 void GeneralShuffle::addUndef() { 5685 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5686 for (unsigned I = 0; I < BytesPerElement; ++I) 5687 Bytes.push_back(-1); 5688 } 5689 5690 // Add an extra element to the shuffle, taking it from element Elem of Op. 5691 // A null Op indicates a vector input whose value will be calculated later; 5692 // there is at most one such input per shuffle and it always has the same 5693 // type as the result. Aborts and returns false if the source vector elements 5694 // of an EXTRACT_VECTOR_ELT are smaller than the destination elements. Per 5695 // LLVM they become implicitly extended, but this is rare and not optimized. 5696 bool GeneralShuffle::add(SDValue Op, unsigned Elem) { 5697 unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); 5698 5699 // The source vector can have wider elements than the result, 5700 // either through an explicit TRUNCATE or because of type legalization. 5701 // We want the least significant part. 5702 EVT FromVT = Op.getNode() ? Op.getValueType() : VT; 5703 unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); 5704 5705 // Return false if the source elements are smaller than their destination 5706 // elements. 5707 if (FromBytesPerElement < BytesPerElement) 5708 return false; 5709 5710 unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + 5711 (FromBytesPerElement - BytesPerElement)); 5712 5713 // Look through things like shuffles and bitcasts. 5714 while (Op.getNode()) { 5715 if (Op.getOpcode() == ISD::BITCAST) 5716 Op = Op.getOperand(0); 5717 else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { 5718 // See whether the bytes we need come from a contiguous part of one 5719 // operand. 5720 SmallVector<int, SystemZ::VectorBytes> OpBytes; 5721 if (!getVPermMask(Op, OpBytes)) 5722 break; 5723 int NewByte; 5724 if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) 5725 break; 5726 if (NewByte < 0) { 5727 addUndef(); 5728 return true; 5729 } 5730 Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); 5731 Byte = unsigned(NewByte) % SystemZ::VectorBytes; 5732 } else if (Op.isUndef()) { 5733 addUndef(); 5734 return true; 5735 } else 5736 break; 5737 } 5738 5739 // Make sure that the source of the extraction is in Ops. 5740 unsigned OpNo = 0; 5741 for (; OpNo < Ops.size(); ++OpNo) 5742 if (Ops[OpNo] == Op) 5743 break; 5744 if (OpNo == Ops.size()) 5745 Ops.push_back(Op); 5746 5747 // Add the element to Bytes. 5748 unsigned Base = OpNo * SystemZ::VectorBytes + Byte; 5749 for (unsigned I = 0; I < BytesPerElement; ++I) 5750 Bytes.push_back(Base + I); 5751 5752 return true; 5753 } 5754 5755 // Return SDNodes for the completed shuffle. 5756 SDValue GeneralShuffle::getNode(SelectionDAG &DAG, const SDLoc &DL) { 5757 assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); 5758 5759 if (Ops.size() == 0) 5760 return DAG.getUNDEF(VT); 5761 5762 // Use a single unpack if possible as the last operation. 5763 tryPrepareForUnpack(); 5764 5765 // Make sure that there are at least two shuffle operands. 5766 if (Ops.size() == 1) 5767 Ops.push_back(DAG.getUNDEF(MVT::v16i8)); 5768 5769 // Create a tree of shuffles, deferring root node until after the loop. 5770 // Try to redistribute the undefined elements of non-root nodes so that 5771 // the non-root shuffles match something like a pack or merge, then adjust 5772 // the parent node's permute vector to compensate for the new order. 5773 // Among other things, this copes with vectors like <2 x i16> that were 5774 // padded with undefined elements during type legalization. 5775 // 5776 // In the best case this redistribution will lead to the whole tree 5777 // using packs and merges. It should rarely be a loss in other cases. 5778 unsigned Stride = 1; 5779 for (; Stride * 2 < Ops.size(); Stride *= 2) { 5780 for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { 5781 SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; 5782 5783 // Create a mask for just these two operands. 5784 SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes); 5785 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 5786 unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; 5787 unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; 5788 if (OpNo == I) 5789 NewBytes[J] = Byte; 5790 else if (OpNo == I + Stride) 5791 NewBytes[J] = SystemZ::VectorBytes + Byte; 5792 else 5793 NewBytes[J] = -1; 5794 } 5795 // See if it would be better to reorganize NewMask to avoid using VPERM. 5796 SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes); 5797 if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { 5798 Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); 5799 // Applying NewBytesMap to Ops[I] gets back to NewBytes. 5800 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { 5801 if (NewBytes[J] >= 0) { 5802 assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && 5803 "Invalid double permute"); 5804 Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; 5805 } else 5806 assert(NewBytesMap[J] < 0 && "Invalid double permute"); 5807 } 5808 } else { 5809 // Just use NewBytes on the operands. 5810 Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); 5811 for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) 5812 if (NewBytes[J] >= 0) 5813 Bytes[J] = I * SystemZ::VectorBytes + J; 5814 } 5815 } 5816 } 5817 5818 // Now we just have 2 inputs. Put the second operand in Ops[1]. 5819 if (Stride > 1) { 5820 Ops[1] = Ops[Stride]; 5821 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5822 if (Bytes[I] >= int(SystemZ::VectorBytes)) 5823 Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; 5824 } 5825 5826 // Look for an instruction that can do the permute without resorting 5827 // to VPERM. 5828 unsigned OpNo0, OpNo1; 5829 SDValue Op; 5830 if (unpackWasPrepared() && Ops[1].isUndef()) 5831 Op = Ops[0]; 5832 else if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) 5833 Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); 5834 else 5835 Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); 5836 5837 Op = insertUnpackIfPrepared(DAG, DL, Op); 5838 5839 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 5840 } 5841 5842 #ifndef NDEBUG 5843 static void dumpBytes(const SmallVectorImpl<int> &Bytes, std::string Msg) { 5844 dbgs() << Msg.c_str() << " { "; 5845 for (unsigned i = 0; i < Bytes.size(); i++) 5846 dbgs() << Bytes[i] << " "; 5847 dbgs() << "}\n"; 5848 } 5849 #endif 5850 5851 // If the Bytes vector matches an unpack operation, prepare to do the unpack 5852 // after all else by removing the zero vector and the effect of the unpack on 5853 // Bytes. 5854 void GeneralShuffle::tryPrepareForUnpack() { 5855 uint32_t ZeroVecOpNo = findZeroVectorIdx(&Ops[0], Ops.size()); 5856 if (ZeroVecOpNo == UINT32_MAX || Ops.size() == 1) 5857 return; 5858 5859 // Only do this if removing the zero vector reduces the depth, otherwise 5860 // the critical path will increase with the final unpack. 5861 if (Ops.size() > 2 && 5862 Log2_32_Ceil(Ops.size()) == Log2_32_Ceil(Ops.size() - 1)) 5863 return; 5864 5865 // Find an unpack that would allow removing the zero vector from Ops. 5866 UnpackFromEltSize = 1; 5867 for (; UnpackFromEltSize <= 4; UnpackFromEltSize *= 2) { 5868 bool MatchUnpack = true; 5869 SmallVector<int, SystemZ::VectorBytes> SrcBytes; 5870 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes; Elt++) { 5871 unsigned ToEltSize = UnpackFromEltSize * 2; 5872 bool IsZextByte = (Elt % ToEltSize) < UnpackFromEltSize; 5873 if (!IsZextByte) 5874 SrcBytes.push_back(Bytes[Elt]); 5875 if (Bytes[Elt] != -1) { 5876 unsigned OpNo = unsigned(Bytes[Elt]) / SystemZ::VectorBytes; 5877 if (IsZextByte != (OpNo == ZeroVecOpNo)) { 5878 MatchUnpack = false; 5879 break; 5880 } 5881 } 5882 } 5883 if (MatchUnpack) { 5884 if (Ops.size() == 2) { 5885 // Don't use unpack if a single source operand needs rearrangement. 5886 for (unsigned i = 0; i < SystemZ::VectorBytes / 2; i++) 5887 if (SrcBytes[i] != -1 && SrcBytes[i] % 16 != int(i)) { 5888 UnpackFromEltSize = UINT_MAX; 5889 return; 5890 } 5891 } 5892 break; 5893 } 5894 } 5895 if (UnpackFromEltSize > 4) 5896 return; 5897 5898 LLVM_DEBUG(dbgs() << "Preparing for final unpack of element size " 5899 << UnpackFromEltSize << ". Zero vector is Op#" << ZeroVecOpNo 5900 << ".\n"; 5901 dumpBytes(Bytes, "Original Bytes vector:");); 5902 5903 // Apply the unpack in reverse to the Bytes array. 5904 unsigned B = 0; 5905 for (unsigned Elt = 0; Elt < SystemZ::VectorBytes;) { 5906 Elt += UnpackFromEltSize; 5907 for (unsigned i = 0; i < UnpackFromEltSize; i++, Elt++, B++) 5908 Bytes[B] = Bytes[Elt]; 5909 } 5910 while (B < SystemZ::VectorBytes) 5911 Bytes[B++] = -1; 5912 5913 // Remove the zero vector from Ops 5914 Ops.erase(&Ops[ZeroVecOpNo]); 5915 for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) 5916 if (Bytes[I] >= 0) { 5917 unsigned OpNo = unsigned(Bytes[I]) / SystemZ::VectorBytes; 5918 if (OpNo > ZeroVecOpNo) 5919 Bytes[I] -= SystemZ::VectorBytes; 5920 } 5921 5922 LLVM_DEBUG(dumpBytes(Bytes, "Resulting Bytes vector, zero vector removed:"); 5923 dbgs() << "\n";); 5924 } 5925 5926 SDValue GeneralShuffle::insertUnpackIfPrepared(SelectionDAG &DAG, 5927 const SDLoc &DL, 5928 SDValue Op) { 5929 if (!unpackWasPrepared()) 5930 return Op; 5931 unsigned InBits = UnpackFromEltSize * 8; 5932 EVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBits), 5933 SystemZ::VectorBits / InBits); 5934 SDValue PackedOp = DAG.getNode(ISD::BITCAST, DL, InVT, Op); 5935 unsigned OutBits = InBits * 2; 5936 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(OutBits), 5937 SystemZ::VectorBits / OutBits); 5938 return DAG.getNode(SystemZISD::UNPACKL_HIGH, DL, OutVT, PackedOp); 5939 } 5940 5941 // Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. 5942 static bool isScalarToVector(SDValue Op) { 5943 for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) 5944 if (!Op.getOperand(I).isUndef()) 5945 return false; 5946 return true; 5947 } 5948 5949 // Return a vector of type VT that contains Value in the first element. 5950 // The other elements don't matter. 5951 static SDValue buildScalarToVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5952 SDValue Value) { 5953 // If we have a constant, replicate it to all elements and let the 5954 // BUILD_VECTOR lowering take care of it. 5955 if (Value.getOpcode() == ISD::Constant || 5956 Value.getOpcode() == ISD::ConstantFP) { 5957 SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value); 5958 return DAG.getBuildVector(VT, DL, Ops); 5959 } 5960 if (Value.isUndef()) 5961 return DAG.getUNDEF(VT); 5962 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); 5963 } 5964 5965 // Return a vector of type VT in which Op0 is in element 0 and Op1 is in 5966 // element 1. Used for cases in which replication is cheap. 5967 static SDValue buildMergeScalars(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 5968 SDValue Op0, SDValue Op1) { 5969 if (Op0.isUndef()) { 5970 if (Op1.isUndef()) 5971 return DAG.getUNDEF(VT); 5972 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); 5973 } 5974 if (Op1.isUndef()) 5975 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); 5976 return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, 5977 buildScalarToVector(DAG, DL, VT, Op0), 5978 buildScalarToVector(DAG, DL, VT, Op1)); 5979 } 5980 5981 // Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 5982 // vector for them. 5983 static SDValue joinDwords(SelectionDAG &DAG, const SDLoc &DL, SDValue Op0, 5984 SDValue Op1) { 5985 if (Op0.isUndef() && Op1.isUndef()) 5986 return DAG.getUNDEF(MVT::v2i64); 5987 // If one of the two inputs is undefined then replicate the other one, 5988 // in order to avoid using another register unnecessarily. 5989 if (Op0.isUndef()) 5990 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 5991 else if (Op1.isUndef()) 5992 Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5993 else { 5994 Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); 5995 Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); 5996 } 5997 return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); 5998 } 5999 6000 // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually 6001 // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for 6002 // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR 6003 // would benefit from this representation and return it if so. 6004 static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, 6005 BuildVectorSDNode *BVN) { 6006 EVT VT = BVN->getValueType(0); 6007 unsigned NumElements = VT.getVectorNumElements(); 6008 6009 // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation 6010 // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still 6011 // need a BUILD_VECTOR, add an additional placeholder operand for that 6012 // BUILD_VECTOR and store its operands in ResidueOps. 6013 GeneralShuffle GS(VT); 6014 SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps; 6015 bool FoundOne = false; 6016 for (unsigned I = 0; I < NumElements; ++I) { 6017 SDValue Op = BVN->getOperand(I); 6018 if (Op.getOpcode() == ISD::TRUNCATE) 6019 Op = Op.getOperand(0); 6020 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 6021 Op.getOperand(1).getOpcode() == ISD::Constant) { 6022 unsigned Elem = Op.getConstantOperandVal(1); 6023 if (!GS.add(Op.getOperand(0), Elem)) 6024 return SDValue(); 6025 FoundOne = true; 6026 } else if (Op.isUndef()) { 6027 GS.addUndef(); 6028 } else { 6029 if (!GS.add(SDValue(), ResidueOps.size())) 6030 return SDValue(); 6031 ResidueOps.push_back(BVN->getOperand(I)); 6032 } 6033 } 6034 6035 // Nothing to do if there are no EXTRACT_VECTOR_ELTs. 6036 if (!FoundOne) 6037 return SDValue(); 6038 6039 // Create the BUILD_VECTOR for the remaining elements, if any. 6040 if (!ResidueOps.empty()) { 6041 while (ResidueOps.size() < NumElements) 6042 ResidueOps.push_back(DAG.getUNDEF(ResidueOps[0].getValueType())); 6043 for (auto &Op : GS.Ops) { 6044 if (!Op.getNode()) { 6045 Op = DAG.getBuildVector(VT, SDLoc(BVN), ResidueOps); 6046 break; 6047 } 6048 } 6049 } 6050 return GS.getNode(DAG, SDLoc(BVN)); 6051 } 6052 6053 bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const { 6054 if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed()) 6055 return true; 6056 if (auto *AL = dyn_cast<AtomicSDNode>(Op)) 6057 if (AL->getOpcode() == ISD::ATOMIC_LOAD) 6058 return true; 6059 if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV) 6060 return true; 6061 return false; 6062 } 6063 6064 // Combine GPR scalar values Elems into a vector of type VT. 6065 SDValue 6066 SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT, 6067 SmallVectorImpl<SDValue> &Elems) const { 6068 // See whether there is a single replicated value. 6069 SDValue Single; 6070 unsigned int NumElements = Elems.size(); 6071 unsigned int Count = 0; 6072 for (auto Elem : Elems) { 6073 if (!Elem.isUndef()) { 6074 if (!Single.getNode()) 6075 Single = Elem; 6076 else if (Elem != Single) { 6077 Single = SDValue(); 6078 break; 6079 } 6080 Count += 1; 6081 } 6082 } 6083 // There are three cases here: 6084 // 6085 // - if the only defined element is a loaded one, the best sequence 6086 // is a replicating load. 6087 // 6088 // - otherwise, if the only defined element is an i64 value, we will 6089 // end up with the same VLVGP sequence regardless of whether we short-cut 6090 // for replication or fall through to the later code. 6091 // 6092 // - otherwise, if the only defined element is an i32 or smaller value, 6093 // we would need 2 instructions to replicate it: VLVGP followed by VREPx. 6094 // This is only a win if the single defined element is used more than once. 6095 // In other cases we're better off using a single VLVGx. 6096 if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single))) 6097 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); 6098 6099 // If all elements are loads, use VLREP/VLEs (below). 6100 bool AllLoads = true; 6101 for (auto Elem : Elems) 6102 if (!isVectorElementLoad(Elem)) { 6103 AllLoads = false; 6104 break; 6105 } 6106 6107 // The best way of building a v2i64 from two i64s is to use VLVGP. 6108 if (VT == MVT::v2i64 && !AllLoads) 6109 return joinDwords(DAG, DL, Elems[0], Elems[1]); 6110 6111 // Use a 64-bit merge high to combine two doubles. 6112 if (VT == MVT::v2f64 && !AllLoads) 6113 return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 6114 6115 // Build v4f32 values directly from the FPRs: 6116 // 6117 // <Axxx> <Bxxx> <Cxxxx> <Dxxx> 6118 // V V VMRHF 6119 // <ABxx> <CDxx> 6120 // V VMRHG 6121 // <ABCD> 6122 if (VT == MVT::v4f32 && !AllLoads) { 6123 SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); 6124 SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); 6125 // Avoid unnecessary undefs by reusing the other operand. 6126 if (Op01.isUndef()) 6127 Op01 = Op23; 6128 else if (Op23.isUndef()) 6129 Op23 = Op01; 6130 // Merging identical replications is a no-op. 6131 if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) 6132 return Op01; 6133 Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); 6134 Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); 6135 SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, 6136 DL, MVT::v2i64, Op01, Op23); 6137 return DAG.getNode(ISD::BITCAST, DL, VT, Op); 6138 } 6139 6140 // Collect the constant terms. 6141 SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue()); 6142 SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false); 6143 6144 unsigned NumConstants = 0; 6145 for (unsigned I = 0; I < NumElements; ++I) { 6146 SDValue Elem = Elems[I]; 6147 if (Elem.getOpcode() == ISD::Constant || 6148 Elem.getOpcode() == ISD::ConstantFP) { 6149 NumConstants += 1; 6150 Constants[I] = Elem; 6151 Done[I] = true; 6152 } 6153 } 6154 // If there was at least one constant, fill in the other elements of 6155 // Constants with undefs to get a full vector constant and use that 6156 // as the starting point. 6157 SDValue Result; 6158 SDValue ReplicatedVal; 6159 if (NumConstants > 0) { 6160 for (unsigned I = 0; I < NumElements; ++I) 6161 if (!Constants[I].getNode()) 6162 Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); 6163 Result = DAG.getBuildVector(VT, DL, Constants); 6164 } else { 6165 // Otherwise try to use VLREP or VLVGP to start the sequence in order to 6166 // avoid a false dependency on any previous contents of the vector 6167 // register. 6168 6169 // Use a VLREP if at least one element is a load. Make sure to replicate 6170 // the load with the most elements having its value. 6171 std::map<const SDNode*, unsigned> UseCounts; 6172 SDNode *LoadMaxUses = nullptr; 6173 for (unsigned I = 0; I < NumElements; ++I) 6174 if (isVectorElementLoad(Elems[I])) { 6175 SDNode *Ld = Elems[I].getNode(); 6176 UseCounts[Ld]++; 6177 if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld]) 6178 LoadMaxUses = Ld; 6179 } 6180 if (LoadMaxUses != nullptr) { 6181 ReplicatedVal = SDValue(LoadMaxUses, 0); 6182 Result = DAG.getNode(SystemZISD::REPLICATE, DL, VT, ReplicatedVal); 6183 } else { 6184 // Try to use VLVGP. 6185 unsigned I1 = NumElements / 2 - 1; 6186 unsigned I2 = NumElements - 1; 6187 bool Def1 = !Elems[I1].isUndef(); 6188 bool Def2 = !Elems[I2].isUndef(); 6189 if (Def1 || Def2) { 6190 SDValue Elem1 = Elems[Def1 ? I1 : I2]; 6191 SDValue Elem2 = Elems[Def2 ? I2 : I1]; 6192 Result = DAG.getNode(ISD::BITCAST, DL, VT, 6193 joinDwords(DAG, DL, Elem1, Elem2)); 6194 Done[I1] = true; 6195 Done[I2] = true; 6196 } else 6197 Result = DAG.getUNDEF(VT); 6198 } 6199 } 6200 6201 // Use VLVGx to insert the other elements. 6202 for (unsigned I = 0; I < NumElements; ++I) 6203 if (!Done[I] && !Elems[I].isUndef() && Elems[I] != ReplicatedVal) 6204 Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], 6205 DAG.getConstant(I, DL, MVT::i32)); 6206 return Result; 6207 } 6208 6209 SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, 6210 SelectionDAG &DAG) const { 6211 auto *BVN = cast<BuildVectorSDNode>(Op.getNode()); 6212 SDLoc DL(Op); 6213 EVT VT = Op.getValueType(); 6214 6215 if (BVN->isConstant()) { 6216 if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) 6217 return Op; 6218 6219 // Fall back to loading it from memory. 6220 return SDValue(); 6221 } 6222 6223 // See if we should use shuffles to construct the vector from other vectors. 6224 if (SDValue Res = tryBuildVectorShuffle(DAG, BVN)) 6225 return Res; 6226 6227 // Detect SCALAR_TO_VECTOR conversions. 6228 if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) 6229 return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); 6230 6231 // Otherwise use buildVector to build the vector up from GPRs. 6232 unsigned NumElements = Op.getNumOperands(); 6233 SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements); 6234 for (unsigned I = 0; I < NumElements; ++I) 6235 Ops[I] = Op.getOperand(I); 6236 return buildVector(DAG, DL, VT, Ops); 6237 } 6238 6239 SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, 6240 SelectionDAG &DAG) const { 6241 auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode()); 6242 SDLoc DL(Op); 6243 EVT VT = Op.getValueType(); 6244 unsigned NumElements = VT.getVectorNumElements(); 6245 6246 if (VSN->isSplat()) { 6247 SDValue Op0 = Op.getOperand(0); 6248 unsigned Index = VSN->getSplatIndex(); 6249 assert(Index < VT.getVectorNumElements() && 6250 "Splat index should be defined and in first operand"); 6251 // See whether the value we're splatting is directly available as a scalar. 6252 if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 6253 Op0.getOpcode() == ISD::BUILD_VECTOR) 6254 return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); 6255 // Otherwise keep it as a vector-to-vector operation. 6256 return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), 6257 DAG.getTargetConstant(Index, DL, MVT::i32)); 6258 } 6259 6260 GeneralShuffle GS(VT); 6261 for (unsigned I = 0; I < NumElements; ++I) { 6262 int Elt = VSN->getMaskElt(I); 6263 if (Elt < 0) 6264 GS.addUndef(); 6265 else if (!GS.add(Op.getOperand(unsigned(Elt) / NumElements), 6266 unsigned(Elt) % NumElements)) 6267 return SDValue(); 6268 } 6269 return GS.getNode(DAG, SDLoc(VSN)); 6270 } 6271 6272 SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, 6273 SelectionDAG &DAG) const { 6274 SDLoc DL(Op); 6275 // Just insert the scalar into element 0 of an undefined vector. 6276 return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, 6277 Op.getValueType(), DAG.getUNDEF(Op.getValueType()), 6278 Op.getOperand(0), DAG.getConstant(0, DL, MVT::i32)); 6279 } 6280 6281 SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, 6282 SelectionDAG &DAG) const { 6283 // Handle insertions of floating-point values. 6284 SDLoc DL(Op); 6285 SDValue Op0 = Op.getOperand(0); 6286 SDValue Op1 = Op.getOperand(1); 6287 SDValue Op2 = Op.getOperand(2); 6288 EVT VT = Op.getValueType(); 6289 6290 // Insertions into constant indices of a v2f64 can be done using VPDI. 6291 // However, if the inserted value is a bitcast or a constant then it's 6292 // better to use GPRs, as below. 6293 if (VT == MVT::v2f64 && 6294 Op1.getOpcode() != ISD::BITCAST && 6295 Op1.getOpcode() != ISD::ConstantFP && 6296 Op2.getOpcode() == ISD::Constant) { 6297 uint64_t Index = Op2->getAsZExtVal(); 6298 unsigned Mask = VT.getVectorNumElements() - 1; 6299 if (Index <= Mask) 6300 return Op; 6301 } 6302 6303 // Otherwise bitcast to the equivalent integer form and insert via a GPR. 6304 MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); 6305 MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); 6306 SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, 6307 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), 6308 DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); 6309 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 6310 } 6311 6312 SDValue 6313 SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, 6314 SelectionDAG &DAG) const { 6315 // Handle extractions of floating-point values. 6316 SDLoc DL(Op); 6317 SDValue Op0 = Op.getOperand(0); 6318 SDValue Op1 = Op.getOperand(1); 6319 EVT VT = Op.getValueType(); 6320 EVT VecVT = Op0.getValueType(); 6321 6322 // Extractions of constant indices can be done directly. 6323 if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) { 6324 uint64_t Index = CIndexN->getZExtValue(); 6325 unsigned Mask = VecVT.getVectorNumElements() - 1; 6326 if (Index <= Mask) 6327 return Op; 6328 } 6329 6330 // Otherwise bitcast to the equivalent integer form and extract via a GPR. 6331 MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); 6332 MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); 6333 SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, 6334 DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); 6335 return DAG.getNode(ISD::BITCAST, DL, VT, Res); 6336 } 6337 6338 SDValue SystemZTargetLowering:: 6339 lowerSIGN_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { 6340 SDValue PackedOp = Op.getOperand(0); 6341 EVT OutVT = Op.getValueType(); 6342 EVT InVT = PackedOp.getValueType(); 6343 unsigned ToBits = OutVT.getScalarSizeInBits(); 6344 unsigned FromBits = InVT.getScalarSizeInBits(); 6345 do { 6346 FromBits *= 2; 6347 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), 6348 SystemZ::VectorBits / FromBits); 6349 PackedOp = 6350 DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(PackedOp), OutVT, PackedOp); 6351 } while (FromBits != ToBits); 6352 return PackedOp; 6353 } 6354 6355 // Lower a ZERO_EXTEND_VECTOR_INREG to a vector shuffle with a zero vector. 6356 SDValue SystemZTargetLowering:: 6357 lowerZERO_EXTEND_VECTOR_INREG(SDValue Op, SelectionDAG &DAG) const { 6358 SDValue PackedOp = Op.getOperand(0); 6359 SDLoc DL(Op); 6360 EVT OutVT = Op.getValueType(); 6361 EVT InVT = PackedOp.getValueType(); 6362 unsigned InNumElts = InVT.getVectorNumElements(); 6363 unsigned OutNumElts = OutVT.getVectorNumElements(); 6364 unsigned NumInPerOut = InNumElts / OutNumElts; 6365 6366 SDValue ZeroVec = 6367 DAG.getSplatVector(InVT, DL, DAG.getConstant(0, DL, InVT.getScalarType())); 6368 6369 SmallVector<int, 16> Mask(InNumElts); 6370 unsigned ZeroVecElt = InNumElts; 6371 for (unsigned PackedElt = 0; PackedElt < OutNumElts; PackedElt++) { 6372 unsigned MaskElt = PackedElt * NumInPerOut; 6373 unsigned End = MaskElt + NumInPerOut - 1; 6374 for (; MaskElt < End; MaskElt++) 6375 Mask[MaskElt] = ZeroVecElt++; 6376 Mask[MaskElt] = PackedElt; 6377 } 6378 SDValue Shuf = DAG.getVectorShuffle(InVT, DL, PackedOp, ZeroVec, Mask); 6379 return DAG.getNode(ISD::BITCAST, DL, OutVT, Shuf); 6380 } 6381 6382 SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, 6383 unsigned ByScalar) const { 6384 // Look for cases where a vector shift can use the *_BY_SCALAR form. 6385 SDValue Op0 = Op.getOperand(0); 6386 SDValue Op1 = Op.getOperand(1); 6387 SDLoc DL(Op); 6388 EVT VT = Op.getValueType(); 6389 unsigned ElemBitSize = VT.getScalarSizeInBits(); 6390 6391 // See whether the shift vector is a splat represented as BUILD_VECTOR. 6392 if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) { 6393 APInt SplatBits, SplatUndef; 6394 unsigned SplatBitSize; 6395 bool HasAnyUndefs; 6396 // Check for constant splats. Use ElemBitSize as the minimum element 6397 // width and reject splats that need wider elements. 6398 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 6399 ElemBitSize, true) && 6400 SplatBitSize == ElemBitSize) { 6401 SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, 6402 DL, MVT::i32); 6403 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6404 } 6405 // Check for variable splats. 6406 BitVector UndefElements; 6407 SDValue Splat = BVN->getSplatValue(&UndefElements); 6408 if (Splat) { 6409 // Since i32 is the smallest legal type, we either need a no-op 6410 // or a truncation. 6411 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); 6412 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6413 } 6414 } 6415 6416 // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, 6417 // and the shift amount is directly available in a GPR. 6418 if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) { 6419 if (VSN->isSplat()) { 6420 SDValue VSNOp0 = VSN->getOperand(0); 6421 unsigned Index = VSN->getSplatIndex(); 6422 assert(Index < VT.getVectorNumElements() && 6423 "Splat index should be defined and in first operand"); 6424 if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || 6425 VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { 6426 // Since i32 is the smallest legal type, we either need a no-op 6427 // or a truncation. 6428 SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, 6429 VSNOp0.getOperand(Index)); 6430 return DAG.getNode(ByScalar, DL, VT, Op0, Shift); 6431 } 6432 } 6433 } 6434 6435 // Otherwise just treat the current form as legal. 6436 return Op; 6437 } 6438 6439 static SDValue lowerAddrSpaceCast(SDValue Op, SelectionDAG &DAG) { 6440 SDLoc dl(Op); 6441 SDValue Src = Op.getOperand(0); 6442 MVT DstVT = Op.getSimpleValueType(); 6443 6444 AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode()); 6445 unsigned SrcAS = N->getSrcAddressSpace(); 6446 6447 assert(SrcAS != N->getDestAddressSpace() && 6448 "addrspacecast must be between different address spaces"); 6449 6450 // addrspacecast [0 <- 1] : Assinging a ptr32 value to a 64-bit pointer. 6451 // addrspacecast [1 <- 0] : Assigining a 64-bit pointer to a ptr32 value. 6452 if (SrcAS == SYSTEMZAS::PTR32 && DstVT == MVT::i64) { 6453 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Src, 6454 DAG.getConstant(0x7fffffff, dl, MVT::i32)); 6455 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op); 6456 } else if (DstVT == MVT::i32) { 6457 Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src); 6458 Op = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 6459 DAG.getConstant(0x7fffffff, dl, MVT::i32)); 6460 Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op); 6461 } else { 6462 report_fatal_error("Bad address space in addrspacecast"); 6463 } 6464 return Op; 6465 } 6466 6467 SDValue SystemZTargetLowering::lowerIS_FPCLASS(SDValue Op, 6468 SelectionDAG &DAG) const { 6469 SDLoc DL(Op); 6470 MVT ResultVT = Op.getSimpleValueType(); 6471 SDValue Arg = Op.getOperand(0); 6472 unsigned Check = Op.getConstantOperandVal(1); 6473 6474 unsigned TDCMask = 0; 6475 if (Check & fcSNan) 6476 TDCMask |= SystemZ::TDCMASK_SNAN_PLUS | SystemZ::TDCMASK_SNAN_MINUS; 6477 if (Check & fcQNan) 6478 TDCMask |= SystemZ::TDCMASK_QNAN_PLUS | SystemZ::TDCMASK_QNAN_MINUS; 6479 if (Check & fcPosInf) 6480 TDCMask |= SystemZ::TDCMASK_INFINITY_PLUS; 6481 if (Check & fcNegInf) 6482 TDCMask |= SystemZ::TDCMASK_INFINITY_MINUS; 6483 if (Check & fcPosNormal) 6484 TDCMask |= SystemZ::TDCMASK_NORMAL_PLUS; 6485 if (Check & fcNegNormal) 6486 TDCMask |= SystemZ::TDCMASK_NORMAL_MINUS; 6487 if (Check & fcPosSubnormal) 6488 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_PLUS; 6489 if (Check & fcNegSubnormal) 6490 TDCMask |= SystemZ::TDCMASK_SUBNORMAL_MINUS; 6491 if (Check & fcPosZero) 6492 TDCMask |= SystemZ::TDCMASK_ZERO_PLUS; 6493 if (Check & fcNegZero) 6494 TDCMask |= SystemZ::TDCMASK_ZERO_MINUS; 6495 SDValue TDCMaskV = DAG.getConstant(TDCMask, DL, MVT::i64); 6496 6497 SDValue Intr = DAG.getNode(SystemZISD::TDC, DL, ResultVT, Arg, TDCMaskV); 6498 return getCCResult(DAG, Intr); 6499 } 6500 6501 SDValue SystemZTargetLowering::lowerREADCYCLECOUNTER(SDValue Op, 6502 SelectionDAG &DAG) const { 6503 SDLoc DL(Op); 6504 SDValue Chain = Op.getOperand(0); 6505 6506 // STCKF only supports a memory operand, so we have to use a temporary. 6507 SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); 6508 int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); 6509 MachinePointerInfo MPI = 6510 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); 6511 6512 // Use STCFK to store the TOD clock into the temporary. 6513 SDValue StoreOps[] = {Chain, StackPtr}; 6514 Chain = DAG.getMemIntrinsicNode( 6515 SystemZISD::STCKF, DL, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, 6516 MPI, MaybeAlign(), MachineMemOperand::MOStore); 6517 6518 // And read it back from there. 6519 return DAG.getLoad(MVT::i64, DL, Chain, StackPtr, MPI); 6520 } 6521 6522 SDValue SystemZTargetLowering::LowerOperation(SDValue Op, 6523 SelectionDAG &DAG) const { 6524 switch (Op.getOpcode()) { 6525 case ISD::FRAMEADDR: 6526 return lowerFRAMEADDR(Op, DAG); 6527 case ISD::RETURNADDR: 6528 return lowerRETURNADDR(Op, DAG); 6529 case ISD::BR_CC: 6530 return lowerBR_CC(Op, DAG); 6531 case ISD::SELECT_CC: 6532 return lowerSELECT_CC(Op, DAG); 6533 case ISD::SETCC: 6534 return lowerSETCC(Op, DAG); 6535 case ISD::STRICT_FSETCC: 6536 return lowerSTRICT_FSETCC(Op, DAG, false); 6537 case ISD::STRICT_FSETCCS: 6538 return lowerSTRICT_FSETCC(Op, DAG, true); 6539 case ISD::GlobalAddress: 6540 return lowerGlobalAddress(cast<GlobalAddressSDNode>(Op), DAG); 6541 case ISD::GlobalTLSAddress: 6542 return lowerGlobalTLSAddress(cast<GlobalAddressSDNode>(Op), DAG); 6543 case ISD::BlockAddress: 6544 return lowerBlockAddress(cast<BlockAddressSDNode>(Op), DAG); 6545 case ISD::JumpTable: 6546 return lowerJumpTable(cast<JumpTableSDNode>(Op), DAG); 6547 case ISD::ConstantPool: 6548 return lowerConstantPool(cast<ConstantPoolSDNode>(Op), DAG); 6549 case ISD::BITCAST: 6550 return lowerBITCAST(Op, DAG); 6551 case ISD::VASTART: 6552 return lowerVASTART(Op, DAG); 6553 case ISD::VACOPY: 6554 return lowerVACOPY(Op, DAG); 6555 case ISD::DYNAMIC_STACKALLOC: 6556 return lowerDYNAMIC_STACKALLOC(Op, DAG); 6557 case ISD::GET_DYNAMIC_AREA_OFFSET: 6558 return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG); 6559 case ISD::MULHS: 6560 return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI); 6561 case ISD::MULHU: 6562 return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI); 6563 case ISD::SMUL_LOHI: 6564 return lowerSMUL_LOHI(Op, DAG); 6565 case ISD::UMUL_LOHI: 6566 return lowerUMUL_LOHI(Op, DAG); 6567 case ISD::SDIVREM: 6568 return lowerSDIVREM(Op, DAG); 6569 case ISD::UDIVREM: 6570 return lowerUDIVREM(Op, DAG); 6571 case ISD::SADDO: 6572 case ISD::SSUBO: 6573 case ISD::UADDO: 6574 case ISD::USUBO: 6575 return lowerXALUO(Op, DAG); 6576 case ISD::UADDO_CARRY: 6577 case ISD::USUBO_CARRY: 6578 return lowerUADDSUBO_CARRY(Op, DAG); 6579 case ISD::OR: 6580 return lowerOR(Op, DAG); 6581 case ISD::CTPOP: 6582 return lowerCTPOP(Op, DAG); 6583 case ISD::VECREDUCE_ADD: 6584 return lowerVECREDUCE_ADD(Op, DAG); 6585 case ISD::ATOMIC_FENCE: 6586 return lowerATOMIC_FENCE(Op, DAG); 6587 case ISD::ATOMIC_SWAP: 6588 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); 6589 case ISD::ATOMIC_STORE: 6590 case ISD::ATOMIC_LOAD: 6591 return lowerATOMIC_LDST_I128(Op, DAG); 6592 case ISD::ATOMIC_LOAD_ADD: 6593 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_ADD); 6594 case ISD::ATOMIC_LOAD_SUB: 6595 return lowerATOMIC_LOAD_SUB(Op, DAG); 6596 case ISD::ATOMIC_LOAD_AND: 6597 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_AND); 6598 case ISD::ATOMIC_LOAD_OR: 6599 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_OR); 6600 case ISD::ATOMIC_LOAD_XOR: 6601 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_XOR); 6602 case ISD::ATOMIC_LOAD_NAND: 6603 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_NAND); 6604 case ISD::ATOMIC_LOAD_MIN: 6605 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MIN); 6606 case ISD::ATOMIC_LOAD_MAX: 6607 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_MAX); 6608 case ISD::ATOMIC_LOAD_UMIN: 6609 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMIN); 6610 case ISD::ATOMIC_LOAD_UMAX: 6611 return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_LOADW_UMAX); 6612 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: 6613 return lowerATOMIC_CMP_SWAP(Op, DAG); 6614 case ISD::STACKSAVE: 6615 return lowerSTACKSAVE(Op, DAG); 6616 case ISD::STACKRESTORE: 6617 return lowerSTACKRESTORE(Op, DAG); 6618 case ISD::PREFETCH: 6619 return lowerPREFETCH(Op, DAG); 6620 case ISD::INTRINSIC_W_CHAIN: 6621 return lowerINTRINSIC_W_CHAIN(Op, DAG); 6622 case ISD::INTRINSIC_WO_CHAIN: 6623 return lowerINTRINSIC_WO_CHAIN(Op, DAG); 6624 case ISD::BUILD_VECTOR: 6625 return lowerBUILD_VECTOR(Op, DAG); 6626 case ISD::VECTOR_SHUFFLE: 6627 return lowerVECTOR_SHUFFLE(Op, DAG); 6628 case ISD::SCALAR_TO_VECTOR: 6629 return lowerSCALAR_TO_VECTOR(Op, DAG); 6630 case ISD::INSERT_VECTOR_ELT: 6631 return lowerINSERT_VECTOR_ELT(Op, DAG); 6632 case ISD::EXTRACT_VECTOR_ELT: 6633 return lowerEXTRACT_VECTOR_ELT(Op, DAG); 6634 case ISD::SIGN_EXTEND_VECTOR_INREG: 6635 return lowerSIGN_EXTEND_VECTOR_INREG(Op, DAG); 6636 case ISD::ZERO_EXTEND_VECTOR_INREG: 6637 return lowerZERO_EXTEND_VECTOR_INREG(Op, DAG); 6638 case ISD::SHL: 6639 return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); 6640 case ISD::SRL: 6641 return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); 6642 case ISD::SRA: 6643 return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); 6644 case ISD::ADDRSPACECAST: 6645 return lowerAddrSpaceCast(Op, DAG); 6646 case ISD::ROTL: 6647 return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR); 6648 case ISD::IS_FPCLASS: 6649 return lowerIS_FPCLASS(Op, DAG); 6650 case ISD::GET_ROUNDING: 6651 return lowerGET_ROUNDING(Op, DAG); 6652 case ISD::READCYCLECOUNTER: 6653 return lowerREADCYCLECOUNTER(Op, DAG); 6654 case ISD::EH_SJLJ_SETJMP: 6655 case ISD::EH_SJLJ_LONGJMP: 6656 // These operations are legal on our platform, but we cannot actually 6657 // set the operation action to Legal as common code would treat this 6658 // as equivalent to Expand. Instead, we keep the operation action to 6659 // Custom and just leave them unchanged here. 6660 return Op; 6661 6662 default: 6663 llvm_unreachable("Unexpected node to lower"); 6664 } 6665 } 6666 6667 static SDValue expandBitCastI128ToF128(SelectionDAG &DAG, SDValue Src, 6668 const SDLoc &SL) { 6669 // If i128 is legal, just use a normal bitcast. 6670 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) 6671 return DAG.getBitcast(MVT::f128, Src); 6672 6673 // Otherwise, f128 must live in FP128, so do a partwise move. 6674 assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == 6675 &SystemZ::FP128BitRegClass); 6676 6677 SDValue Hi, Lo; 6678 std::tie(Lo, Hi) = DAG.SplitScalar(Src, SL, MVT::i64, MVT::i64); 6679 6680 Hi = DAG.getBitcast(MVT::f64, Hi); 6681 Lo = DAG.getBitcast(MVT::f64, Lo); 6682 6683 SDNode *Pair = DAG.getMachineNode( 6684 SystemZ::REG_SEQUENCE, SL, MVT::f128, 6685 {DAG.getTargetConstant(SystemZ::FP128BitRegClassID, SL, MVT::i32), Lo, 6686 DAG.getTargetConstant(SystemZ::subreg_l64, SL, MVT::i32), Hi, 6687 DAG.getTargetConstant(SystemZ::subreg_h64, SL, MVT::i32)}); 6688 return SDValue(Pair, 0); 6689 } 6690 6691 static SDValue expandBitCastF128ToI128(SelectionDAG &DAG, SDValue Src, 6692 const SDLoc &SL) { 6693 // If i128 is legal, just use a normal bitcast. 6694 if (DAG.getTargetLoweringInfo().isTypeLegal(MVT::i128)) 6695 return DAG.getBitcast(MVT::i128, Src); 6696 6697 // Otherwise, f128 must live in FP128, so do a partwise move. 6698 assert(DAG.getTargetLoweringInfo().getRepRegClassFor(MVT::f128) == 6699 &SystemZ::FP128BitRegClass); 6700 6701 SDValue LoFP = 6702 DAG.getTargetExtractSubreg(SystemZ::subreg_l64, SL, MVT::f64, Src); 6703 SDValue HiFP = 6704 DAG.getTargetExtractSubreg(SystemZ::subreg_h64, SL, MVT::f64, Src); 6705 SDValue Lo = DAG.getNode(ISD::BITCAST, SL, MVT::i64, LoFP); 6706 SDValue Hi = DAG.getNode(ISD::BITCAST, SL, MVT::i64, HiFP); 6707 6708 return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i128, Lo, Hi); 6709 } 6710 6711 // Lower operations with invalid operand or result types (currently used 6712 // only for 128-bit integer types). 6713 void 6714 SystemZTargetLowering::LowerOperationWrapper(SDNode *N, 6715 SmallVectorImpl<SDValue> &Results, 6716 SelectionDAG &DAG) const { 6717 switch (N->getOpcode()) { 6718 case ISD::ATOMIC_LOAD: { 6719 SDLoc DL(N); 6720 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::Other); 6721 SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; 6722 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6723 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_LOAD_128, 6724 DL, Tys, Ops, MVT::i128, MMO); 6725 6726 SDValue Lowered = lowerGR128ToI128(DAG, Res); 6727 if (N->getValueType(0) == MVT::f128) 6728 Lowered = expandBitCastI128ToF128(DAG, Lowered, DL); 6729 Results.push_back(Lowered); 6730 Results.push_back(Res.getValue(1)); 6731 break; 6732 } 6733 case ISD::ATOMIC_STORE: { 6734 SDLoc DL(N); 6735 SDVTList Tys = DAG.getVTList(MVT::Other); 6736 SDValue Val = N->getOperand(1); 6737 if (Val.getValueType() == MVT::f128) 6738 Val = expandBitCastF128ToI128(DAG, Val, DL); 6739 Val = lowerI128ToGR128(DAG, Val); 6740 6741 SDValue Ops[] = {N->getOperand(0), Val, N->getOperand(2)}; 6742 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6743 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_STORE_128, 6744 DL, Tys, Ops, MVT::i128, MMO); 6745 // We have to enforce sequential consistency by performing a 6746 // serialization operation after the store. 6747 if (cast<AtomicSDNode>(N)->getSuccessOrdering() == 6748 AtomicOrdering::SequentiallyConsistent) 6749 Res = SDValue(DAG.getMachineNode(SystemZ::Serialize, DL, 6750 MVT::Other, Res), 0); 6751 Results.push_back(Res); 6752 break; 6753 } 6754 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { 6755 SDLoc DL(N); 6756 SDVTList Tys = DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other); 6757 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 6758 lowerI128ToGR128(DAG, N->getOperand(2)), 6759 lowerI128ToGR128(DAG, N->getOperand(3)) }; 6760 MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); 6761 SDValue Res = DAG.getMemIntrinsicNode(SystemZISD::ATOMIC_CMP_SWAP_128, 6762 DL, Tys, Ops, MVT::i128, MMO); 6763 SDValue Success = emitSETCC(DAG, DL, Res.getValue(1), 6764 SystemZ::CCMASK_CS, SystemZ::CCMASK_CS_EQ); 6765 Success = DAG.getZExtOrTrunc(Success, DL, N->getValueType(1)); 6766 Results.push_back(lowerGR128ToI128(DAG, Res)); 6767 Results.push_back(Success); 6768 Results.push_back(Res.getValue(2)); 6769 break; 6770 } 6771 case ISD::BITCAST: { 6772 SDValue Src = N->getOperand(0); 6773 if (N->getValueType(0) == MVT::i128 && Src.getValueType() == MVT::f128 && 6774 !useSoftFloat()) { 6775 SDLoc DL(N); 6776 Results.push_back(expandBitCastF128ToI128(DAG, Src, DL)); 6777 } 6778 break; 6779 } 6780 default: 6781 llvm_unreachable("Unexpected node to lower"); 6782 } 6783 } 6784 6785 void 6786 SystemZTargetLowering::ReplaceNodeResults(SDNode *N, 6787 SmallVectorImpl<SDValue> &Results, 6788 SelectionDAG &DAG) const { 6789 return LowerOperationWrapper(N, Results, DAG); 6790 } 6791 6792 const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { 6793 #define OPCODE(NAME) case SystemZISD::NAME: return "SystemZISD::" #NAME 6794 switch ((SystemZISD::NodeType)Opcode) { 6795 case SystemZISD::FIRST_NUMBER: break; 6796 OPCODE(RET_GLUE); 6797 OPCODE(CALL); 6798 OPCODE(SIBCALL); 6799 OPCODE(TLS_GDCALL); 6800 OPCODE(TLS_LDCALL); 6801 OPCODE(PCREL_WRAPPER); 6802 OPCODE(PCREL_OFFSET); 6803 OPCODE(ICMP); 6804 OPCODE(FCMP); 6805 OPCODE(STRICT_FCMP); 6806 OPCODE(STRICT_FCMPS); 6807 OPCODE(TM); 6808 OPCODE(BR_CCMASK); 6809 OPCODE(SELECT_CCMASK); 6810 OPCODE(ADJDYNALLOC); 6811 OPCODE(PROBED_ALLOCA); 6812 OPCODE(POPCNT); 6813 OPCODE(SMUL_LOHI); 6814 OPCODE(UMUL_LOHI); 6815 OPCODE(SDIVREM); 6816 OPCODE(UDIVREM); 6817 OPCODE(SADDO); 6818 OPCODE(SSUBO); 6819 OPCODE(UADDO); 6820 OPCODE(USUBO); 6821 OPCODE(ADDCARRY); 6822 OPCODE(SUBCARRY); 6823 OPCODE(GET_CCMASK); 6824 OPCODE(MVC); 6825 OPCODE(NC); 6826 OPCODE(OC); 6827 OPCODE(XC); 6828 OPCODE(CLC); 6829 OPCODE(MEMSET_MVC); 6830 OPCODE(STPCPY); 6831 OPCODE(STRCMP); 6832 OPCODE(SEARCH_STRING); 6833 OPCODE(IPM); 6834 OPCODE(TBEGIN); 6835 OPCODE(TBEGIN_NOFLOAT); 6836 OPCODE(TEND); 6837 OPCODE(BYTE_MASK); 6838 OPCODE(ROTATE_MASK); 6839 OPCODE(REPLICATE); 6840 OPCODE(JOIN_DWORDS); 6841 OPCODE(SPLAT); 6842 OPCODE(MERGE_HIGH); 6843 OPCODE(MERGE_LOW); 6844 OPCODE(SHL_DOUBLE); 6845 OPCODE(PERMUTE_DWORDS); 6846 OPCODE(PERMUTE); 6847 OPCODE(PACK); 6848 OPCODE(PACKS_CC); 6849 OPCODE(PACKLS_CC); 6850 OPCODE(UNPACK_HIGH); 6851 OPCODE(UNPACKL_HIGH); 6852 OPCODE(UNPACK_LOW); 6853 OPCODE(UNPACKL_LOW); 6854 OPCODE(VSHL_BY_SCALAR); 6855 OPCODE(VSRL_BY_SCALAR); 6856 OPCODE(VSRA_BY_SCALAR); 6857 OPCODE(VROTL_BY_SCALAR); 6858 OPCODE(VSUM); 6859 OPCODE(VACC); 6860 OPCODE(VSCBI); 6861 OPCODE(VAC); 6862 OPCODE(VSBI); 6863 OPCODE(VACCC); 6864 OPCODE(VSBCBI); 6865 OPCODE(VICMPE); 6866 OPCODE(VICMPH); 6867 OPCODE(VICMPHL); 6868 OPCODE(VICMPES); 6869 OPCODE(VICMPHS); 6870 OPCODE(VICMPHLS); 6871 OPCODE(VFCMPE); 6872 OPCODE(STRICT_VFCMPE); 6873 OPCODE(STRICT_VFCMPES); 6874 OPCODE(VFCMPH); 6875 OPCODE(STRICT_VFCMPH); 6876 OPCODE(STRICT_VFCMPHS); 6877 OPCODE(VFCMPHE); 6878 OPCODE(STRICT_VFCMPHE); 6879 OPCODE(STRICT_VFCMPHES); 6880 OPCODE(VFCMPES); 6881 OPCODE(VFCMPHS); 6882 OPCODE(VFCMPHES); 6883 OPCODE(VFTCI); 6884 OPCODE(VEXTEND); 6885 OPCODE(STRICT_VEXTEND); 6886 OPCODE(VROUND); 6887 OPCODE(STRICT_VROUND); 6888 OPCODE(VTM); 6889 OPCODE(SCMP128HI); 6890 OPCODE(UCMP128HI); 6891 OPCODE(VFAE_CC); 6892 OPCODE(VFAEZ_CC); 6893 OPCODE(VFEE_CC); 6894 OPCODE(VFEEZ_CC); 6895 OPCODE(VFENE_CC); 6896 OPCODE(VFENEZ_CC); 6897 OPCODE(VISTR_CC); 6898 OPCODE(VSTRC_CC); 6899 OPCODE(VSTRCZ_CC); 6900 OPCODE(VSTRS_CC); 6901 OPCODE(VSTRSZ_CC); 6902 OPCODE(TDC); 6903 OPCODE(ATOMIC_SWAPW); 6904 OPCODE(ATOMIC_LOADW_ADD); 6905 OPCODE(ATOMIC_LOADW_SUB); 6906 OPCODE(ATOMIC_LOADW_AND); 6907 OPCODE(ATOMIC_LOADW_OR); 6908 OPCODE(ATOMIC_LOADW_XOR); 6909 OPCODE(ATOMIC_LOADW_NAND); 6910 OPCODE(ATOMIC_LOADW_MIN); 6911 OPCODE(ATOMIC_LOADW_MAX); 6912 OPCODE(ATOMIC_LOADW_UMIN); 6913 OPCODE(ATOMIC_LOADW_UMAX); 6914 OPCODE(ATOMIC_CMP_SWAPW); 6915 OPCODE(ATOMIC_CMP_SWAP); 6916 OPCODE(ATOMIC_LOAD_128); 6917 OPCODE(ATOMIC_STORE_128); 6918 OPCODE(ATOMIC_CMP_SWAP_128); 6919 OPCODE(LRV); 6920 OPCODE(STRV); 6921 OPCODE(VLER); 6922 OPCODE(VSTER); 6923 OPCODE(STCKF); 6924 OPCODE(PREFETCH); 6925 OPCODE(ADA_ENTRY); 6926 } 6927 return nullptr; 6928 #undef OPCODE 6929 } 6930 6931 // Return true if VT is a vector whose elements are a whole number of bytes 6932 // in width. Also check for presence of vector support. 6933 bool SystemZTargetLowering::canTreatAsByteVector(EVT VT) const { 6934 if (!Subtarget.hasVector()) 6935 return false; 6936 6937 return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0 && VT.isSimple(); 6938 } 6939 6940 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT 6941 // producing a result of type ResVT. Op is a possibly bitcast version 6942 // of the input vector and Index is the index (based on type VecVT) that 6943 // should be extracted. Return the new extraction if a simplification 6944 // was possible or if Force is true. 6945 SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT, 6946 EVT VecVT, SDValue Op, 6947 unsigned Index, 6948 DAGCombinerInfo &DCI, 6949 bool Force) const { 6950 SelectionDAG &DAG = DCI.DAG; 6951 6952 // The number of bytes being extracted. 6953 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 6954 6955 for (;;) { 6956 unsigned Opcode = Op.getOpcode(); 6957 if (Opcode == ISD::BITCAST) 6958 // Look through bitcasts. 6959 Op = Op.getOperand(0); 6960 else if ((Opcode == ISD::VECTOR_SHUFFLE || Opcode == SystemZISD::SPLAT) && 6961 canTreatAsByteVector(Op.getValueType())) { 6962 // Get a VPERM-like permute mask and see whether the bytes covered 6963 // by the extracted element are a contiguous sequence from one 6964 // source operand. 6965 SmallVector<int, SystemZ::VectorBytes> Bytes; 6966 if (!getVPermMask(Op, Bytes)) 6967 break; 6968 int First; 6969 if (!getShuffleInput(Bytes, Index * BytesPerElement, 6970 BytesPerElement, First)) 6971 break; 6972 if (First < 0) 6973 return DAG.getUNDEF(ResVT); 6974 // Make sure the contiguous sequence starts at a multiple of the 6975 // original element size. 6976 unsigned Byte = unsigned(First) % Bytes.size(); 6977 if (Byte % BytesPerElement != 0) 6978 break; 6979 // We can get the extracted value directly from an input. 6980 Index = Byte / BytesPerElement; 6981 Op = Op.getOperand(unsigned(First) / Bytes.size()); 6982 Force = true; 6983 } else if (Opcode == ISD::BUILD_VECTOR && 6984 canTreatAsByteVector(Op.getValueType())) { 6985 // We can only optimize this case if the BUILD_VECTOR elements are 6986 // at least as wide as the extracted value. 6987 EVT OpVT = Op.getValueType(); 6988 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 6989 if (OpBytesPerElement < BytesPerElement) 6990 break; 6991 // Make sure that the least-significant bit of the extracted value 6992 // is the least significant bit of an input. 6993 unsigned End = (Index + 1) * BytesPerElement; 6994 if (End % OpBytesPerElement != 0) 6995 break; 6996 // We're extracting the low part of one operand of the BUILD_VECTOR. 6997 Op = Op.getOperand(End / OpBytesPerElement - 1); 6998 if (!Op.getValueType().isInteger()) { 6999 EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits()); 7000 Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); 7001 DCI.AddToWorklist(Op.getNode()); 7002 } 7003 EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); 7004 Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); 7005 if (VT != ResVT) { 7006 DCI.AddToWorklist(Op.getNode()); 7007 Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); 7008 } 7009 return Op; 7010 } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || 7011 Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || 7012 Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && 7013 canTreatAsByteVector(Op.getValueType()) && 7014 canTreatAsByteVector(Op.getOperand(0).getValueType())) { 7015 // Make sure that only the unextended bits are significant. 7016 EVT ExtVT = Op.getValueType(); 7017 EVT OpVT = Op.getOperand(0).getValueType(); 7018 unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); 7019 unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); 7020 unsigned Byte = Index * BytesPerElement; 7021 unsigned SubByte = Byte % ExtBytesPerElement; 7022 unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; 7023 if (SubByte < MinSubByte || 7024 SubByte + BytesPerElement > ExtBytesPerElement) 7025 break; 7026 // Get the byte offset of the unextended element 7027 Byte = Byte / ExtBytesPerElement * OpBytesPerElement; 7028 // ...then add the byte offset relative to that element. 7029 Byte += SubByte - MinSubByte; 7030 if (Byte % BytesPerElement != 0) 7031 break; 7032 Op = Op.getOperand(0); 7033 Index = Byte / BytesPerElement; 7034 Force = true; 7035 } else 7036 break; 7037 } 7038 if (Force) { 7039 if (Op.getValueType() != VecVT) { 7040 Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); 7041 DCI.AddToWorklist(Op.getNode()); 7042 } 7043 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, 7044 DAG.getConstant(Index, DL, MVT::i32)); 7045 } 7046 return SDValue(); 7047 } 7048 7049 // Optimize vector operations in scalar value Op on the basis that Op 7050 // is truncated to TruncVT. 7051 SDValue SystemZTargetLowering::combineTruncateExtract( 7052 const SDLoc &DL, EVT TruncVT, SDValue Op, DAGCombinerInfo &DCI) const { 7053 // If we have (trunc (extract_vector_elt X, Y)), try to turn it into 7054 // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements 7055 // of type TruncVT. 7056 if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7057 TruncVT.getSizeInBits() % 8 == 0) { 7058 SDValue Vec = Op.getOperand(0); 7059 EVT VecVT = Vec.getValueType(); 7060 if (canTreatAsByteVector(VecVT)) { 7061 if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 7062 unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); 7063 unsigned TruncBytes = TruncVT.getStoreSize(); 7064 if (BytesPerElement % TruncBytes == 0) { 7065 // Calculate the value of Y' in the above description. We are 7066 // splitting the original elements into Scale equal-sized pieces 7067 // and for truncation purposes want the last (least-significant) 7068 // of these pieces for IndexN. This is easiest to do by calculating 7069 // the start index of the following element and then subtracting 1. 7070 unsigned Scale = BytesPerElement / TruncBytes; 7071 unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; 7072 7073 // Defer the creation of the bitcast from X to combineExtract, 7074 // which might be able to optimize the extraction. 7075 VecVT = EVT::getVectorVT(*DCI.DAG.getContext(), 7076 MVT::getIntegerVT(TruncBytes * 8), 7077 VecVT.getStoreSize() / TruncBytes); 7078 EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); 7079 return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); 7080 } 7081 } 7082 } 7083 } 7084 return SDValue(); 7085 } 7086 7087 SDValue SystemZTargetLowering::combineZERO_EXTEND( 7088 SDNode *N, DAGCombinerInfo &DCI) const { 7089 // Convert (zext (select_ccmask C1, C2)) into (select_ccmask C1', C2') 7090 SelectionDAG &DAG = DCI.DAG; 7091 SDValue N0 = N->getOperand(0); 7092 EVT VT = N->getValueType(0); 7093 if (N0.getOpcode() == SystemZISD::SELECT_CCMASK) { 7094 auto *TrueOp = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 7095 auto *FalseOp = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 7096 if (TrueOp && FalseOp) { 7097 SDLoc DL(N0); 7098 SDValue Ops[] = { DAG.getConstant(TrueOp->getZExtValue(), DL, VT), 7099 DAG.getConstant(FalseOp->getZExtValue(), DL, VT), 7100 N0.getOperand(2), N0.getOperand(3), N0.getOperand(4) }; 7101 SDValue NewSelect = DAG.getNode(SystemZISD::SELECT_CCMASK, DL, VT, Ops); 7102 // If N0 has multiple uses, change other uses as well. 7103 if (!N0.hasOneUse()) { 7104 SDValue TruncSelect = 7105 DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), NewSelect); 7106 DCI.CombineTo(N0.getNode(), TruncSelect); 7107 } 7108 return NewSelect; 7109 } 7110 } 7111 // Convert (zext (xor (trunc X), C)) into (xor (trunc X), C') if the size 7112 // of the result is smaller than the size of X and all the truncated bits 7113 // of X are already zero. 7114 if (N0.getOpcode() == ISD::XOR && 7115 N0.hasOneUse() && N0.getOperand(0).hasOneUse() && 7116 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 7117 N0.getOperand(1).getOpcode() == ISD::Constant) { 7118 SDValue X = N0.getOperand(0).getOperand(0); 7119 if (VT.isScalarInteger() && VT.getSizeInBits() < X.getValueSizeInBits()) { 7120 KnownBits Known = DAG.computeKnownBits(X); 7121 APInt TruncatedBits = APInt::getBitsSet(X.getValueSizeInBits(), 7122 N0.getValueSizeInBits(), 7123 VT.getSizeInBits()); 7124 if (TruncatedBits.isSubsetOf(Known.Zero)) { 7125 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 7126 APInt Mask = N0.getConstantOperandAPInt(1).zext(VT.getSizeInBits()); 7127 return DAG.getNode(ISD::XOR, SDLoc(N0), VT, 7128 X, DAG.getConstant(Mask, SDLoc(N0), VT)); 7129 } 7130 } 7131 } 7132 7133 return SDValue(); 7134 } 7135 7136 SDValue SystemZTargetLowering::combineSIGN_EXTEND_INREG( 7137 SDNode *N, DAGCombinerInfo &DCI) const { 7138 // Convert (sext_in_reg (setcc LHS, RHS, COND), i1) 7139 // and (sext_in_reg (any_extend (setcc LHS, RHS, COND)), i1) 7140 // into (select_cc LHS, RHS, -1, 0, COND) 7141 SelectionDAG &DAG = DCI.DAG; 7142 SDValue N0 = N->getOperand(0); 7143 EVT VT = N->getValueType(0); 7144 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 7145 if (N0.hasOneUse() && N0.getOpcode() == ISD::ANY_EXTEND) 7146 N0 = N0.getOperand(0); 7147 if (EVT == MVT::i1 && N0.hasOneUse() && N0.getOpcode() == ISD::SETCC) { 7148 SDLoc DL(N0); 7149 SDValue Ops[] = { N0.getOperand(0), N0.getOperand(1), 7150 DAG.getAllOnesConstant(DL, VT), 7151 DAG.getConstant(0, DL, VT), N0.getOperand(2) }; 7152 return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops); 7153 } 7154 return SDValue(); 7155 } 7156 7157 SDValue SystemZTargetLowering::combineSIGN_EXTEND( 7158 SDNode *N, DAGCombinerInfo &DCI) const { 7159 // Convert (sext (ashr (shl X, C1), C2)) to 7160 // (ashr (shl (anyext X), C1'), C2')), since wider shifts are as 7161 // cheap as narrower ones. 7162 SelectionDAG &DAG = DCI.DAG; 7163 SDValue N0 = N->getOperand(0); 7164 EVT VT = N->getValueType(0); 7165 if (N0.hasOneUse() && N0.getOpcode() == ISD::SRA) { 7166 auto *SraAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 7167 SDValue Inner = N0.getOperand(0); 7168 if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) { 7169 if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) { 7170 unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits()); 7171 unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra; 7172 unsigned NewSraAmt = SraAmt->getZExtValue() + Extra; 7173 EVT ShiftVT = N0.getOperand(1).getValueType(); 7174 SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, SDLoc(Inner), VT, 7175 Inner.getOperand(0)); 7176 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(Inner), VT, Ext, 7177 DAG.getConstant(NewShlAmt, SDLoc(Inner), 7178 ShiftVT)); 7179 return DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, 7180 DAG.getConstant(NewSraAmt, SDLoc(N0), ShiftVT)); 7181 } 7182 } 7183 } 7184 7185 return SDValue(); 7186 } 7187 7188 SDValue SystemZTargetLowering::combineMERGE( 7189 SDNode *N, DAGCombinerInfo &DCI) const { 7190 SelectionDAG &DAG = DCI.DAG; 7191 unsigned Opcode = N->getOpcode(); 7192 SDValue Op0 = N->getOperand(0); 7193 SDValue Op1 = N->getOperand(1); 7194 if (Op0.getOpcode() == ISD::BITCAST) 7195 Op0 = Op0.getOperand(0); 7196 if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 7197 // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF 7198 // for v4f32. 7199 if (Op1 == N->getOperand(0)) 7200 return Op1; 7201 // (z_merge_? 0, X) -> (z_unpackl_? 0, X). 7202 EVT VT = Op1.getValueType(); 7203 unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); 7204 if (ElemBytes <= 4) { 7205 Opcode = (Opcode == SystemZISD::MERGE_HIGH ? 7206 SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); 7207 EVT InVT = VT.changeVectorElementTypeToInteger(); 7208 EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), 7209 SystemZ::VectorBytes / ElemBytes / 2); 7210 if (VT != InVT) { 7211 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); 7212 DCI.AddToWorklist(Op1.getNode()); 7213 } 7214 SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); 7215 DCI.AddToWorklist(Op.getNode()); 7216 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 7217 } 7218 } 7219 return SDValue(); 7220 } 7221 7222 static bool isI128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, 7223 SDNode *&HiPart) { 7224 LoPart = HiPart = nullptr; 7225 7226 // Scan through all users. 7227 for (SDUse &Use : LD->uses()) { 7228 // Skip the uses of the chain. 7229 if (Use.getResNo() != 0) 7230 continue; 7231 7232 // Verify every user is a TRUNCATE to i64 of the low or high half. 7233 SDNode *User = Use.getUser(); 7234 bool IsLoPart = true; 7235 if (User->getOpcode() == ISD::SRL && 7236 User->getOperand(1).getOpcode() == ISD::Constant && 7237 User->getConstantOperandVal(1) == 64 && User->hasOneUse()) { 7238 User = *User->user_begin(); 7239 IsLoPart = false; 7240 } 7241 if (User->getOpcode() != ISD::TRUNCATE || User->getValueType(0) != MVT::i64) 7242 return false; 7243 7244 if (IsLoPart) { 7245 if (LoPart) 7246 return false; 7247 LoPart = User; 7248 } else { 7249 if (HiPart) 7250 return false; 7251 HiPart = User; 7252 } 7253 } 7254 return true; 7255 } 7256 7257 static bool isF128MovedToParts(LoadSDNode *LD, SDNode *&LoPart, 7258 SDNode *&HiPart) { 7259 LoPart = HiPart = nullptr; 7260 7261 // Scan through all users. 7262 for (SDUse &Use : LD->uses()) { 7263 // Skip the uses of the chain. 7264 if (Use.getResNo() != 0) 7265 continue; 7266 7267 // Verify every user is an EXTRACT_SUBREG of the low or high half. 7268 SDNode *User = Use.getUser(); 7269 if (!User->hasOneUse() || !User->isMachineOpcode() || 7270 User->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) 7271 return false; 7272 7273 switch (User->getConstantOperandVal(1)) { 7274 case SystemZ::subreg_l64: 7275 if (LoPart) 7276 return false; 7277 LoPart = User; 7278 break; 7279 case SystemZ::subreg_h64: 7280 if (HiPart) 7281 return false; 7282 HiPart = User; 7283 break; 7284 default: 7285 return false; 7286 } 7287 } 7288 return true; 7289 } 7290 7291 SDValue SystemZTargetLowering::combineLOAD( 7292 SDNode *N, DAGCombinerInfo &DCI) const { 7293 SelectionDAG &DAG = DCI.DAG; 7294 EVT LdVT = N->getValueType(0); 7295 if (auto *LN = dyn_cast<LoadSDNode>(N)) { 7296 if (LN->getAddressSpace() == SYSTEMZAS::PTR32) { 7297 MVT PtrVT = getPointerTy(DAG.getDataLayout()); 7298 MVT LoadNodeVT = LN->getBasePtr().getSimpleValueType(); 7299 if (PtrVT != LoadNodeVT) { 7300 SDLoc DL(LN); 7301 SDValue AddrSpaceCast = DAG.getAddrSpaceCast( 7302 DL, PtrVT, LN->getBasePtr(), SYSTEMZAS::PTR32, 0); 7303 return DAG.getExtLoad(LN->getExtensionType(), DL, LN->getValueType(0), 7304 LN->getChain(), AddrSpaceCast, LN->getMemoryVT(), 7305 LN->getMemOperand()); 7306 } 7307 } 7308 } 7309 SDLoc DL(N); 7310 7311 // Replace a 128-bit load that is used solely to move its value into GPRs 7312 // by separate loads of both halves. 7313 LoadSDNode *LD = cast<LoadSDNode>(N); 7314 if (LD->isSimple() && ISD::isNormalLoad(LD)) { 7315 SDNode *LoPart, *HiPart; 7316 if ((LdVT == MVT::i128 && isI128MovedToParts(LD, LoPart, HiPart)) || 7317 (LdVT == MVT::f128 && isF128MovedToParts(LD, LoPart, HiPart))) { 7318 // Rewrite each extraction as an independent load. 7319 SmallVector<SDValue, 2> ArgChains; 7320 if (HiPart) { 7321 SDValue EltLoad = DAG.getLoad( 7322 HiPart->getValueType(0), DL, LD->getChain(), LD->getBasePtr(), 7323 LD->getPointerInfo(), LD->getOriginalAlign(), 7324 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 7325 7326 DCI.CombineTo(HiPart, EltLoad, true); 7327 ArgChains.push_back(EltLoad.getValue(1)); 7328 } 7329 if (LoPart) { 7330 SDValue EltLoad = DAG.getLoad( 7331 LoPart->getValueType(0), DL, LD->getChain(), 7332 DAG.getObjectPtrOffset(DL, LD->getBasePtr(), TypeSize::getFixed(8)), 7333 LD->getPointerInfo().getWithOffset(8), LD->getOriginalAlign(), 7334 LD->getMemOperand()->getFlags(), LD->getAAInfo()); 7335 7336 DCI.CombineTo(LoPart, EltLoad, true); 7337 ArgChains.push_back(EltLoad.getValue(1)); 7338 } 7339 7340 // Collect all chains via TokenFactor. 7341 SDValue Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, ArgChains); 7342 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 7343 DCI.AddToWorklist(Chain.getNode()); 7344 return SDValue(N, 0); 7345 } 7346 } 7347 7348 if (LdVT.isVector() || LdVT.isInteger()) 7349 return SDValue(); 7350 // Transform a scalar load that is REPLICATEd as well as having other 7351 // use(s) to the form where the other use(s) use the first element of the 7352 // REPLICATE instead of the load. Otherwise instruction selection will not 7353 // produce a VLREP. Avoid extracting to a GPR, so only do this for floating 7354 // point loads. 7355 7356 SDValue Replicate; 7357 SmallVector<SDNode*, 8> OtherUses; 7358 for (SDUse &Use : N->uses()) { 7359 if (Use.getUser()->getOpcode() == SystemZISD::REPLICATE) { 7360 if (Replicate) 7361 return SDValue(); // Should never happen 7362 Replicate = SDValue(Use.getUser(), 0); 7363 } else if (Use.getResNo() == 0) 7364 OtherUses.push_back(Use.getUser()); 7365 } 7366 if (!Replicate || OtherUses.empty()) 7367 return SDValue(); 7368 7369 SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT, 7370 Replicate, DAG.getConstant(0, DL, MVT::i32)); 7371 // Update uses of the loaded Value while preserving old chains. 7372 for (SDNode *U : OtherUses) { 7373 SmallVector<SDValue, 8> Ops; 7374 for (SDValue Op : U->ops()) 7375 Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op); 7376 DAG.UpdateNodeOperands(U, Ops); 7377 } 7378 return SDValue(N, 0); 7379 } 7380 7381 bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const { 7382 if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) 7383 return true; 7384 if (Subtarget.hasVectorEnhancements2()) 7385 if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64 || VT == MVT::i128) 7386 return true; 7387 return false; 7388 } 7389 7390 static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) { 7391 if (!VT.isVector() || !VT.isSimple() || 7392 VT.getSizeInBits() != 128 || 7393 VT.getScalarSizeInBits() % 8 != 0) 7394 return false; 7395 7396 unsigned NumElts = VT.getVectorNumElements(); 7397 for (unsigned i = 0; i < NumElts; ++i) { 7398 if (M[i] < 0) continue; // ignore UNDEF indices 7399 if ((unsigned) M[i] != NumElts - 1 - i) 7400 return false; 7401 } 7402 7403 return true; 7404 } 7405 7406 static bool isOnlyUsedByStores(SDValue StoredVal, SelectionDAG &DAG) { 7407 for (auto *U : StoredVal->users()) { 7408 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(U)) { 7409 EVT CurrMemVT = ST->getMemoryVT().getScalarType(); 7410 if (CurrMemVT.isRound() && CurrMemVT.getStoreSize() <= 16) 7411 continue; 7412 } else if (isa<BuildVectorSDNode>(U)) { 7413 SDValue BuildVector = SDValue(U, 0); 7414 if (DAG.isSplatValue(BuildVector, true/*AllowUndefs*/) && 7415 isOnlyUsedByStores(BuildVector, DAG)) 7416 continue; 7417 } 7418 return false; 7419 } 7420 return true; 7421 } 7422 7423 static bool isI128MovedFromParts(SDValue Val, SDValue &LoPart, 7424 SDValue &HiPart) { 7425 if (Val.getOpcode() != ISD::OR || !Val.getNode()->hasOneUse()) 7426 return false; 7427 7428 SDValue Op0 = Val.getOperand(0); 7429 SDValue Op1 = Val.getOperand(1); 7430 7431 if (Op0.getOpcode() == ISD::SHL) 7432 std::swap(Op0, Op1); 7433 if (Op1.getOpcode() != ISD::SHL || !Op1.getNode()->hasOneUse() || 7434 Op1.getOperand(1).getOpcode() != ISD::Constant || 7435 Op1.getConstantOperandVal(1) != 64) 7436 return false; 7437 Op1 = Op1.getOperand(0); 7438 7439 if (Op0.getOpcode() != ISD::ZERO_EXTEND || !Op0.getNode()->hasOneUse() || 7440 Op0.getOperand(0).getValueType() != MVT::i64) 7441 return false; 7442 if (Op1.getOpcode() != ISD::ANY_EXTEND || !Op1.getNode()->hasOneUse() || 7443 Op1.getOperand(0).getValueType() != MVT::i64) 7444 return false; 7445 7446 LoPart = Op0.getOperand(0); 7447 HiPart = Op1.getOperand(0); 7448 return true; 7449 } 7450 7451 static bool isF128MovedFromParts(SDValue Val, SDValue &LoPart, 7452 SDValue &HiPart) { 7453 if (!Val.getNode()->hasOneUse() || !Val.isMachineOpcode() || 7454 Val.getMachineOpcode() != TargetOpcode::REG_SEQUENCE) 7455 return false; 7456 7457 if (Val->getNumOperands() != 5 || 7458 Val->getOperand(0)->getAsZExtVal() != SystemZ::FP128BitRegClassID || 7459 Val->getOperand(2)->getAsZExtVal() != SystemZ::subreg_l64 || 7460 Val->getOperand(4)->getAsZExtVal() != SystemZ::subreg_h64) 7461 return false; 7462 7463 LoPart = Val->getOperand(1); 7464 HiPart = Val->getOperand(3); 7465 return true; 7466 } 7467 7468 SDValue SystemZTargetLowering::combineSTORE( 7469 SDNode *N, DAGCombinerInfo &DCI) const { 7470 SelectionDAG &DAG = DCI.DAG; 7471 auto *SN = cast<StoreSDNode>(N); 7472 auto &Op1 = N->getOperand(1); 7473 EVT MemVT = SN->getMemoryVT(); 7474 7475 if (SN->getAddressSpace() == SYSTEMZAS::PTR32) { 7476 MVT PtrVT = getPointerTy(DAG.getDataLayout()); 7477 MVT StoreNodeVT = SN->getBasePtr().getSimpleValueType(); 7478 if (PtrVT != StoreNodeVT) { 7479 SDLoc DL(SN); 7480 SDValue AddrSpaceCast = DAG.getAddrSpaceCast(DL, PtrVT, SN->getBasePtr(), 7481 SYSTEMZAS::PTR32, 0); 7482 return DAG.getStore(SN->getChain(), DL, SN->getValue(), AddrSpaceCast, 7483 SN->getPointerInfo(), SN->getOriginalAlign(), 7484 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 7485 } 7486 } 7487 7488 // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better 7489 // for the extraction to be done on a vMiN value, so that we can use VSTE. 7490 // If X has wider elements then convert it to: 7491 // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). 7492 if (MemVT.isInteger() && SN->isTruncatingStore()) { 7493 if (SDValue Value = 7494 combineTruncateExtract(SDLoc(N), MemVT, SN->getValue(), DCI)) { 7495 DCI.AddToWorklist(Value.getNode()); 7496 7497 // Rewrite the store with the new form of stored value. 7498 return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, 7499 SN->getBasePtr(), SN->getMemoryVT(), 7500 SN->getMemOperand()); 7501 } 7502 } 7503 // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR 7504 if (!SN->isTruncatingStore() && 7505 Op1.getOpcode() == ISD::BSWAP && 7506 Op1.getNode()->hasOneUse() && 7507 canLoadStoreByteSwapped(Op1.getValueType())) { 7508 7509 SDValue BSwapOp = Op1.getOperand(0); 7510 7511 if (BSwapOp.getValueType() == MVT::i16) 7512 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), MVT::i32, BSwapOp); 7513 7514 SDValue Ops[] = { 7515 N->getOperand(0), BSwapOp, N->getOperand(2) 7516 }; 7517 7518 return 7519 DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other), 7520 Ops, MemVT, SN->getMemOperand()); 7521 } 7522 // Combine STORE (element-swap) into VSTER 7523 if (!SN->isTruncatingStore() && 7524 Op1.getOpcode() == ISD::VECTOR_SHUFFLE && 7525 Op1.getNode()->hasOneUse() && 7526 Subtarget.hasVectorEnhancements2()) { 7527 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode()); 7528 ArrayRef<int> ShuffleMask = SVN->getMask(); 7529 if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) { 7530 SDValue Ops[] = { 7531 N->getOperand(0), Op1.getOperand(0), N->getOperand(2) 7532 }; 7533 7534 return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N), 7535 DAG.getVTList(MVT::Other), 7536 Ops, MemVT, SN->getMemOperand()); 7537 } 7538 } 7539 7540 // Combine STORE (READCYCLECOUNTER) into STCKF. 7541 if (!SN->isTruncatingStore() && 7542 Op1.getOpcode() == ISD::READCYCLECOUNTER && 7543 Op1.hasOneUse() && 7544 N->getOperand(0).reachesChainWithoutSideEffects(SDValue(Op1.getNode(), 1))) { 7545 SDValue Ops[] = { Op1.getOperand(0), N->getOperand(2) }; 7546 return DAG.getMemIntrinsicNode(SystemZISD::STCKF, SDLoc(N), 7547 DAG.getVTList(MVT::Other), 7548 Ops, MemVT, SN->getMemOperand()); 7549 } 7550 7551 // Transform a store of a 128-bit value moved from parts into two stores. 7552 if (SN->isSimple() && ISD::isNormalStore(SN)) { 7553 SDValue LoPart, HiPart; 7554 if ((MemVT == MVT::i128 && isI128MovedFromParts(Op1, LoPart, HiPart)) || 7555 (MemVT == MVT::f128 && isF128MovedFromParts(Op1, LoPart, HiPart))) { 7556 SDLoc DL(SN); 7557 SDValue Chain0 = 7558 DAG.getStore(SN->getChain(), DL, HiPart, SN->getBasePtr(), 7559 SN->getPointerInfo(), SN->getOriginalAlign(), 7560 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 7561 SDValue Chain1 = 7562 DAG.getStore(SN->getChain(), DL, LoPart, 7563 DAG.getObjectPtrOffset(DL, SN->getBasePtr(), 7564 TypeSize::getFixed(8)), 7565 SN->getPointerInfo().getWithOffset(8), 7566 SN->getOriginalAlign(), 7567 SN->getMemOperand()->getFlags(), SN->getAAInfo()); 7568 7569 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chain0, Chain1); 7570 } 7571 } 7572 7573 // Replicate a reg or immediate with VREP instead of scalar multiply or 7574 // immediate load. It seems best to do this during the first DAGCombine as 7575 // it is straight-forward to handle the zero-extend node in the initial 7576 // DAG, and also not worry about the keeping the new MemVT legal (e.g. when 7577 // extracting an i16 element from a v16i8 vector). 7578 if (Subtarget.hasVector() && DCI.Level == BeforeLegalizeTypes && 7579 isOnlyUsedByStores(Op1, DAG)) { 7580 SDValue Word = SDValue(); 7581 EVT WordVT; 7582 7583 // Find a replicated immediate and return it if found in Word and its 7584 // type in WordVT. 7585 auto FindReplicatedImm = [&](ConstantSDNode *C, unsigned TotBytes) { 7586 // Some constants are better handled with a scalar store. 7587 if (C->getAPIntValue().getBitWidth() > 64 || C->isAllOnes() || 7588 isInt<16>(C->getSExtValue()) || MemVT.getStoreSize() <= 2) 7589 return; 7590 7591 APInt Val = C->getAPIntValue(); 7592 // Truncate Val in case of a truncating store. 7593 if (!llvm::isUIntN(TotBytes * 8, Val.getZExtValue())) { 7594 assert(SN->isTruncatingStore() && 7595 "Non-truncating store and immediate value does not fit?"); 7596 Val = Val.trunc(TotBytes * 8); 7597 } 7598 7599 SystemZVectorConstantInfo VCI(APInt(TotBytes * 8, Val.getZExtValue())); 7600 if (VCI.isVectorConstantLegal(Subtarget) && 7601 VCI.Opcode == SystemZISD::REPLICATE) { 7602 Word = DAG.getConstant(VCI.OpVals[0], SDLoc(SN), MVT::i32); 7603 WordVT = VCI.VecVT.getScalarType(); 7604 } 7605 }; 7606 7607 // Find a replicated register and return it if found in Word and its type 7608 // in WordVT. 7609 auto FindReplicatedReg = [&](SDValue MulOp) { 7610 EVT MulVT = MulOp.getValueType(); 7611 if (MulOp->getOpcode() == ISD::MUL && 7612 (MulVT == MVT::i16 || MulVT == MVT::i32 || MulVT == MVT::i64)) { 7613 // Find a zero extended value and its type. 7614 SDValue LHS = MulOp->getOperand(0); 7615 if (LHS->getOpcode() == ISD::ZERO_EXTEND) 7616 WordVT = LHS->getOperand(0).getValueType(); 7617 else if (LHS->getOpcode() == ISD::AssertZext) 7618 WordVT = cast<VTSDNode>(LHS->getOperand(1))->getVT(); 7619 else 7620 return; 7621 // Find a replicating constant, e.g. 0x00010001. 7622 if (auto *C = dyn_cast<ConstantSDNode>(MulOp->getOperand(1))) { 7623 SystemZVectorConstantInfo VCI( 7624 APInt(MulVT.getSizeInBits(), C->getZExtValue())); 7625 if (VCI.isVectorConstantLegal(Subtarget) && 7626 VCI.Opcode == SystemZISD::REPLICATE && VCI.OpVals[0] == 1 && 7627 WordVT == VCI.VecVT.getScalarType()) 7628 Word = DAG.getZExtOrTrunc(LHS->getOperand(0), SDLoc(SN), WordVT); 7629 } 7630 } 7631 }; 7632 7633 if (isa<BuildVectorSDNode>(Op1) && 7634 DAG.isSplatValue(Op1, true/*AllowUndefs*/)) { 7635 SDValue SplatVal = Op1->getOperand(0); 7636 if (auto *C = dyn_cast<ConstantSDNode>(SplatVal)) 7637 FindReplicatedImm(C, SplatVal.getValueType().getStoreSize()); 7638 else 7639 FindReplicatedReg(SplatVal); 7640 } else { 7641 if (auto *C = dyn_cast<ConstantSDNode>(Op1)) 7642 FindReplicatedImm(C, MemVT.getStoreSize()); 7643 else 7644 FindReplicatedReg(Op1); 7645 } 7646 7647 if (Word != SDValue()) { 7648 assert(MemVT.getSizeInBits() % WordVT.getSizeInBits() == 0 && 7649 "Bad type handling"); 7650 unsigned NumElts = MemVT.getSizeInBits() / WordVT.getSizeInBits(); 7651 EVT SplatVT = EVT::getVectorVT(*DAG.getContext(), WordVT, NumElts); 7652 SDValue SplatVal = DAG.getSplatVector(SplatVT, SDLoc(SN), Word); 7653 return DAG.getStore(SN->getChain(), SDLoc(SN), SplatVal, 7654 SN->getBasePtr(), SN->getMemOperand()); 7655 } 7656 } 7657 7658 return SDValue(); 7659 } 7660 7661 SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE( 7662 SDNode *N, DAGCombinerInfo &DCI) const { 7663 SelectionDAG &DAG = DCI.DAG; 7664 // Combine element-swap (LOAD) into VLER 7665 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7666 N->getOperand(0).hasOneUse() && 7667 Subtarget.hasVectorEnhancements2()) { 7668 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7669 ArrayRef<int> ShuffleMask = SVN->getMask(); 7670 if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) { 7671 SDValue Load = N->getOperand(0); 7672 LoadSDNode *LD = cast<LoadSDNode>(Load); 7673 7674 // Create the element-swapping load. 7675 SDValue Ops[] = { 7676 LD->getChain(), // Chain 7677 LD->getBasePtr() // Ptr 7678 }; 7679 SDValue ESLoad = 7680 DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N), 7681 DAG.getVTList(LD->getValueType(0), MVT::Other), 7682 Ops, LD->getMemoryVT(), LD->getMemOperand()); 7683 7684 // First, combine the VECTOR_SHUFFLE away. This makes the value produced 7685 // by the load dead. 7686 DCI.CombineTo(N, ESLoad); 7687 7688 // Next, combine the load away, we give it a bogus result value but a real 7689 // chain result. The result value is dead because the shuffle is dead. 7690 DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1)); 7691 7692 // Return N so it doesn't get rechecked! 7693 return SDValue(N, 0); 7694 } 7695 } 7696 7697 return SDValue(); 7698 } 7699 7700 SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT( 7701 SDNode *N, DAGCombinerInfo &DCI) const { 7702 SelectionDAG &DAG = DCI.DAG; 7703 7704 if (!Subtarget.hasVector()) 7705 return SDValue(); 7706 7707 // Look through bitcasts that retain the number of vector elements. 7708 SDValue Op = N->getOperand(0); 7709 if (Op.getOpcode() == ISD::BITCAST && 7710 Op.getValueType().isVector() && 7711 Op.getOperand(0).getValueType().isVector() && 7712 Op.getValueType().getVectorNumElements() == 7713 Op.getOperand(0).getValueType().getVectorNumElements()) 7714 Op = Op.getOperand(0); 7715 7716 // Pull BSWAP out of a vector extraction. 7717 if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) { 7718 EVT VecVT = Op.getValueType(); 7719 EVT EltVT = VecVT.getVectorElementType(); 7720 Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT, 7721 Op.getOperand(0), N->getOperand(1)); 7722 DCI.AddToWorklist(Op.getNode()); 7723 Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op); 7724 if (EltVT != N->getValueType(0)) { 7725 DCI.AddToWorklist(Op.getNode()); 7726 Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op); 7727 } 7728 return Op; 7729 } 7730 7731 // Try to simplify a vector extraction. 7732 if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 7733 SDValue Op0 = N->getOperand(0); 7734 EVT VecVT = Op0.getValueType(); 7735 if (canTreatAsByteVector(VecVT)) 7736 return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, 7737 IndexN->getZExtValue(), DCI, false); 7738 } 7739 return SDValue(); 7740 } 7741 7742 SDValue SystemZTargetLowering::combineJOIN_DWORDS( 7743 SDNode *N, DAGCombinerInfo &DCI) const { 7744 SelectionDAG &DAG = DCI.DAG; 7745 // (join_dwords X, X) == (replicate X) 7746 if (N->getOperand(0) == N->getOperand(1)) 7747 return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), 7748 N->getOperand(0)); 7749 return SDValue(); 7750 } 7751 7752 static SDValue MergeInputChains(SDNode *N1, SDNode *N2) { 7753 SDValue Chain1 = N1->getOperand(0); 7754 SDValue Chain2 = N2->getOperand(0); 7755 7756 // Trivial case: both nodes take the same chain. 7757 if (Chain1 == Chain2) 7758 return Chain1; 7759 7760 // FIXME - we could handle more complex cases via TokenFactor, 7761 // assuming we can verify that this would not create a cycle. 7762 return SDValue(); 7763 } 7764 7765 SDValue SystemZTargetLowering::combineFP_ROUND( 7766 SDNode *N, DAGCombinerInfo &DCI) const { 7767 7768 if (!Subtarget.hasVector()) 7769 return SDValue(); 7770 7771 // (fpround (extract_vector_elt X 0)) 7772 // (fpround (extract_vector_elt X 1)) -> 7773 // (extract_vector_elt (VROUND X) 0) 7774 // (extract_vector_elt (VROUND X) 2) 7775 // 7776 // This is a special case since the target doesn't really support v2f32s. 7777 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 7778 SelectionDAG &DAG = DCI.DAG; 7779 SDValue Op0 = N->getOperand(OpNo); 7780 if (N->getValueType(0) == MVT::f32 && Op0.hasOneUse() && 7781 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7782 Op0.getOperand(0).getValueType() == MVT::v2f64 && 7783 Op0.getOperand(1).getOpcode() == ISD::Constant && 7784 Op0.getConstantOperandVal(1) == 0) { 7785 SDValue Vec = Op0.getOperand(0); 7786 for (auto *U : Vec->users()) { 7787 if (U != Op0.getNode() && U->hasOneUse() && 7788 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7789 U->getOperand(0) == Vec && 7790 U->getOperand(1).getOpcode() == ISD::Constant && 7791 U->getConstantOperandVal(1) == 1) { 7792 SDValue OtherRound = SDValue(*U->user_begin(), 0); 7793 if (OtherRound.getOpcode() == N->getOpcode() && 7794 OtherRound.getOperand(OpNo) == SDValue(U, 0) && 7795 OtherRound.getValueType() == MVT::f32) { 7796 SDValue VRound, Chain; 7797 if (N->isStrictFPOpcode()) { 7798 Chain = MergeInputChains(N, OtherRound.getNode()); 7799 if (!Chain) 7800 continue; 7801 VRound = DAG.getNode(SystemZISD::STRICT_VROUND, SDLoc(N), 7802 {MVT::v4f32, MVT::Other}, {Chain, Vec}); 7803 Chain = VRound.getValue(1); 7804 } else 7805 VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), 7806 MVT::v4f32, Vec); 7807 DCI.AddToWorklist(VRound.getNode()); 7808 SDValue Extract1 = 7809 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, 7810 VRound, DAG.getConstant(2, SDLoc(U), MVT::i32)); 7811 DCI.AddToWorklist(Extract1.getNode()); 7812 DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); 7813 if (Chain) 7814 DAG.ReplaceAllUsesOfValueWith(OtherRound.getValue(1), Chain); 7815 SDValue Extract0 = 7816 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, 7817 VRound, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); 7818 if (Chain) 7819 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), 7820 N->getVTList(), Extract0, Chain); 7821 return Extract0; 7822 } 7823 } 7824 } 7825 } 7826 return SDValue(); 7827 } 7828 7829 SDValue SystemZTargetLowering::combineFP_EXTEND( 7830 SDNode *N, DAGCombinerInfo &DCI) const { 7831 7832 if (!Subtarget.hasVector()) 7833 return SDValue(); 7834 7835 // (fpextend (extract_vector_elt X 0)) 7836 // (fpextend (extract_vector_elt X 2)) -> 7837 // (extract_vector_elt (VEXTEND X) 0) 7838 // (extract_vector_elt (VEXTEND X) 1) 7839 // 7840 // This is a special case since the target doesn't really support v2f32s. 7841 unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0; 7842 SelectionDAG &DAG = DCI.DAG; 7843 SDValue Op0 = N->getOperand(OpNo); 7844 if (N->getValueType(0) == MVT::f64 && Op0.hasOneUse() && 7845 Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7846 Op0.getOperand(0).getValueType() == MVT::v4f32 && 7847 Op0.getOperand(1).getOpcode() == ISD::Constant && 7848 Op0.getConstantOperandVal(1) == 0) { 7849 SDValue Vec = Op0.getOperand(0); 7850 for (auto *U : Vec->users()) { 7851 if (U != Op0.getNode() && U->hasOneUse() && 7852 U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 7853 U->getOperand(0) == Vec && 7854 U->getOperand(1).getOpcode() == ISD::Constant && 7855 U->getConstantOperandVal(1) == 2) { 7856 SDValue OtherExtend = SDValue(*U->user_begin(), 0); 7857 if (OtherExtend.getOpcode() == N->getOpcode() && 7858 OtherExtend.getOperand(OpNo) == SDValue(U, 0) && 7859 OtherExtend.getValueType() == MVT::f64) { 7860 SDValue VExtend, Chain; 7861 if (N->isStrictFPOpcode()) { 7862 Chain = MergeInputChains(N, OtherExtend.getNode()); 7863 if (!Chain) 7864 continue; 7865 VExtend = DAG.getNode(SystemZISD::STRICT_VEXTEND, SDLoc(N), 7866 {MVT::v2f64, MVT::Other}, {Chain, Vec}); 7867 Chain = VExtend.getValue(1); 7868 } else 7869 VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N), 7870 MVT::v2f64, Vec); 7871 DCI.AddToWorklist(VExtend.getNode()); 7872 SDValue Extract1 = 7873 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64, 7874 VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32)); 7875 DCI.AddToWorklist(Extract1.getNode()); 7876 DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1); 7877 if (Chain) 7878 DAG.ReplaceAllUsesOfValueWith(OtherExtend.getValue(1), Chain); 7879 SDValue Extract0 = 7880 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64, 7881 VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32)); 7882 if (Chain) 7883 return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op0), 7884 N->getVTList(), Extract0, Chain); 7885 return Extract0; 7886 } 7887 } 7888 } 7889 } 7890 return SDValue(); 7891 } 7892 7893 SDValue SystemZTargetLowering::combineINT_TO_FP( 7894 SDNode *N, DAGCombinerInfo &DCI) const { 7895 if (DCI.Level != BeforeLegalizeTypes) 7896 return SDValue(); 7897 SelectionDAG &DAG = DCI.DAG; 7898 LLVMContext &Ctx = *DAG.getContext(); 7899 unsigned Opcode = N->getOpcode(); 7900 EVT OutVT = N->getValueType(0); 7901 Type *OutLLVMTy = OutVT.getTypeForEVT(Ctx); 7902 SDValue Op = N->getOperand(0); 7903 unsigned OutScalarBits = OutLLVMTy->getScalarSizeInBits(); 7904 unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits(); 7905 7906 // Insert an extension before type-legalization to avoid scalarization, e.g.: 7907 // v2f64 = uint_to_fp v2i16 7908 // => 7909 // v2f64 = uint_to_fp (v2i64 zero_extend v2i16) 7910 if (OutLLVMTy->isVectorTy() && OutScalarBits > InScalarBits && 7911 OutScalarBits <= 64) { 7912 unsigned NumElts = cast<FixedVectorType>(OutLLVMTy)->getNumElements(); 7913 EVT ExtVT = EVT::getVectorVT( 7914 Ctx, EVT::getIntegerVT(Ctx, OutLLVMTy->getScalarSizeInBits()), NumElts); 7915 unsigned ExtOpcode = 7916 (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND); 7917 SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op); 7918 return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp); 7919 } 7920 return SDValue(); 7921 } 7922 7923 SDValue SystemZTargetLowering::combineBSWAP( 7924 SDNode *N, DAGCombinerInfo &DCI) const { 7925 SelectionDAG &DAG = DCI.DAG; 7926 // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR 7927 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7928 N->getOperand(0).hasOneUse() && 7929 canLoadStoreByteSwapped(N->getValueType(0))) { 7930 SDValue Load = N->getOperand(0); 7931 LoadSDNode *LD = cast<LoadSDNode>(Load); 7932 7933 // Create the byte-swapping load. 7934 SDValue Ops[] = { 7935 LD->getChain(), // Chain 7936 LD->getBasePtr() // Ptr 7937 }; 7938 EVT LoadVT = N->getValueType(0); 7939 if (LoadVT == MVT::i16) 7940 LoadVT = MVT::i32; 7941 SDValue BSLoad = 7942 DAG.getMemIntrinsicNode(SystemZISD::LRV, SDLoc(N), 7943 DAG.getVTList(LoadVT, MVT::Other), 7944 Ops, LD->getMemoryVT(), LD->getMemOperand()); 7945 7946 // If this is an i16 load, insert the truncate. 7947 SDValue ResVal = BSLoad; 7948 if (N->getValueType(0) == MVT::i16) 7949 ResVal = DAG.getNode(ISD::TRUNCATE, SDLoc(N), MVT::i16, BSLoad); 7950 7951 // First, combine the bswap away. This makes the value produced by the 7952 // load dead. 7953 DCI.CombineTo(N, ResVal); 7954 7955 // Next, combine the load away, we give it a bogus result value but a real 7956 // chain result. The result value is dead because the bswap is dead. 7957 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 7958 7959 // Return N so it doesn't get rechecked! 7960 return SDValue(N, 0); 7961 } 7962 7963 // Look through bitcasts that retain the number of vector elements. 7964 SDValue Op = N->getOperand(0); 7965 if (Op.getOpcode() == ISD::BITCAST && 7966 Op.getValueType().isVector() && 7967 Op.getOperand(0).getValueType().isVector() && 7968 Op.getValueType().getVectorNumElements() == 7969 Op.getOperand(0).getValueType().getVectorNumElements()) 7970 Op = Op.getOperand(0); 7971 7972 // Push BSWAP into a vector insertion if at least one side then simplifies. 7973 if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) { 7974 SDValue Vec = Op.getOperand(0); 7975 SDValue Elt = Op.getOperand(1); 7976 SDValue Idx = Op.getOperand(2); 7977 7978 if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) || 7979 Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() || 7980 DAG.isConstantIntBuildVectorOrConstantInt(Elt) || 7981 Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() || 7982 (canLoadStoreByteSwapped(N->getValueType(0)) && 7983 ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) { 7984 EVT VecVT = N->getValueType(0); 7985 EVT EltVT = N->getValueType(0).getVectorElementType(); 7986 if (VecVT != Vec.getValueType()) { 7987 Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec); 7988 DCI.AddToWorklist(Vec.getNode()); 7989 } 7990 if (EltVT != Elt.getValueType()) { 7991 Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt); 7992 DCI.AddToWorklist(Elt.getNode()); 7993 } 7994 Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec); 7995 DCI.AddToWorklist(Vec.getNode()); 7996 Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt); 7997 DCI.AddToWorklist(Elt.getNode()); 7998 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT, 7999 Vec, Elt, Idx); 8000 } 8001 } 8002 8003 // Push BSWAP into a vector shuffle if at least one side then simplifies. 8004 ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op); 8005 if (SV && Op.hasOneUse()) { 8006 SDValue Op0 = Op.getOperand(0); 8007 SDValue Op1 = Op.getOperand(1); 8008 8009 if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) || 8010 Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() || 8011 DAG.isConstantIntBuildVectorOrConstantInt(Op1) || 8012 Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) { 8013 EVT VecVT = N->getValueType(0); 8014 if (VecVT != Op0.getValueType()) { 8015 Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0); 8016 DCI.AddToWorklist(Op0.getNode()); 8017 } 8018 if (VecVT != Op1.getValueType()) { 8019 Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1); 8020 DCI.AddToWorklist(Op1.getNode()); 8021 } 8022 Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0); 8023 DCI.AddToWorklist(Op0.getNode()); 8024 Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1); 8025 DCI.AddToWorklist(Op1.getNode()); 8026 return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask()); 8027 } 8028 } 8029 8030 return SDValue(); 8031 } 8032 8033 static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) { 8034 // We have a SELECT_CCMASK or BR_CCMASK comparing the condition code 8035 // set by the CCReg instruction using the CCValid / CCMask masks, 8036 // If the CCReg instruction is itself a ICMP testing the condition 8037 // code set by some other instruction, see whether we can directly 8038 // use that condition code. 8039 8040 // Verify that we have an ICMP against some constant. 8041 if (CCValid != SystemZ::CCMASK_ICMP) 8042 return false; 8043 auto *ICmp = CCReg.getNode(); 8044 if (ICmp->getOpcode() != SystemZISD::ICMP) 8045 return false; 8046 auto *CompareLHS = ICmp->getOperand(0).getNode(); 8047 auto *CompareRHS = dyn_cast<ConstantSDNode>(ICmp->getOperand(1)); 8048 if (!CompareRHS) 8049 return false; 8050 8051 // Optimize the case where CompareLHS is a SELECT_CCMASK. 8052 if (CompareLHS->getOpcode() == SystemZISD::SELECT_CCMASK) { 8053 // Verify that we have an appropriate mask for a EQ or NE comparison. 8054 bool Invert = false; 8055 if (CCMask == SystemZ::CCMASK_CMP_NE) 8056 Invert = !Invert; 8057 else if (CCMask != SystemZ::CCMASK_CMP_EQ) 8058 return false; 8059 8060 // Verify that the ICMP compares against one of select values. 8061 auto *TrueVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(0)); 8062 if (!TrueVal) 8063 return false; 8064 auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); 8065 if (!FalseVal) 8066 return false; 8067 if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue()) 8068 Invert = !Invert; 8069 else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue()) 8070 return false; 8071 8072 // Compute the effective CC mask for the new branch or select. 8073 auto *NewCCValid = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(2)); 8074 auto *NewCCMask = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(3)); 8075 if (!NewCCValid || !NewCCMask) 8076 return false; 8077 CCValid = NewCCValid->getZExtValue(); 8078 CCMask = NewCCMask->getZExtValue(); 8079 if (Invert) 8080 CCMask ^= CCValid; 8081 8082 // Return the updated CCReg link. 8083 CCReg = CompareLHS->getOperand(4); 8084 return true; 8085 } 8086 8087 // Optimize the case where CompareRHS is (SRA (SHL (IPM))). 8088 if (CompareLHS->getOpcode() == ISD::SRA) { 8089 auto *SRACount = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1)); 8090 if (!SRACount || SRACount->getZExtValue() != 30) 8091 return false; 8092 auto *SHL = CompareLHS->getOperand(0).getNode(); 8093 if (SHL->getOpcode() != ISD::SHL) 8094 return false; 8095 auto *SHLCount = dyn_cast<ConstantSDNode>(SHL->getOperand(1)); 8096 if (!SHLCount || SHLCount->getZExtValue() != 30 - SystemZ::IPM_CC) 8097 return false; 8098 auto *IPM = SHL->getOperand(0).getNode(); 8099 if (IPM->getOpcode() != SystemZISD::IPM) 8100 return false; 8101 8102 // Avoid introducing CC spills (because SRA would clobber CC). 8103 if (!CompareLHS->hasOneUse()) 8104 return false; 8105 // Verify that the ICMP compares against zero. 8106 if (CompareRHS->getZExtValue() != 0) 8107 return false; 8108 8109 // Compute the effective CC mask for the new branch or select. 8110 CCMask = SystemZ::reverseCCMask(CCMask); 8111 8112 // Return the updated CCReg link. 8113 CCReg = IPM->getOperand(0); 8114 return true; 8115 } 8116 8117 return false; 8118 } 8119 8120 SDValue SystemZTargetLowering::combineBR_CCMASK( 8121 SDNode *N, DAGCombinerInfo &DCI) const { 8122 SelectionDAG &DAG = DCI.DAG; 8123 8124 // Combine BR_CCMASK (ICMP (SELECT_CCMASK)) into a single BR_CCMASK. 8125 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8126 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); 8127 if (!CCValid || !CCMask) 8128 return SDValue(); 8129 8130 int CCValidVal = CCValid->getZExtValue(); 8131 int CCMaskVal = CCMask->getZExtValue(); 8132 SDValue Chain = N->getOperand(0); 8133 SDValue CCReg = N->getOperand(4); 8134 8135 if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) 8136 return DAG.getNode(SystemZISD::BR_CCMASK, SDLoc(N), N->getValueType(0), 8137 Chain, 8138 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), 8139 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), 8140 N->getOperand(3), CCReg); 8141 return SDValue(); 8142 } 8143 8144 SDValue SystemZTargetLowering::combineSELECT_CCMASK( 8145 SDNode *N, DAGCombinerInfo &DCI) const { 8146 SelectionDAG &DAG = DCI.DAG; 8147 8148 // Combine SELECT_CCMASK (ICMP (SELECT_CCMASK)) into a single SELECT_CCMASK. 8149 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(2)); 8150 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(3)); 8151 if (!CCValid || !CCMask) 8152 return SDValue(); 8153 8154 int CCValidVal = CCValid->getZExtValue(); 8155 int CCMaskVal = CCMask->getZExtValue(); 8156 SDValue CCReg = N->getOperand(4); 8157 8158 if (combineCCMask(CCReg, CCValidVal, CCMaskVal)) 8159 return DAG.getNode(SystemZISD::SELECT_CCMASK, SDLoc(N), N->getValueType(0), 8160 N->getOperand(0), N->getOperand(1), 8161 DAG.getTargetConstant(CCValidVal, SDLoc(N), MVT::i32), 8162 DAG.getTargetConstant(CCMaskVal, SDLoc(N), MVT::i32), 8163 CCReg); 8164 return SDValue(); 8165 } 8166 8167 8168 SDValue SystemZTargetLowering::combineGET_CCMASK( 8169 SDNode *N, DAGCombinerInfo &DCI) const { 8170 8171 // Optimize away GET_CCMASK (SELECT_CCMASK) if the CC masks are compatible 8172 auto *CCValid = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8173 auto *CCMask = dyn_cast<ConstantSDNode>(N->getOperand(2)); 8174 if (!CCValid || !CCMask) 8175 return SDValue(); 8176 int CCValidVal = CCValid->getZExtValue(); 8177 int CCMaskVal = CCMask->getZExtValue(); 8178 8179 SDValue Select = N->getOperand(0); 8180 if (Select->getOpcode() == ISD::TRUNCATE) 8181 Select = Select->getOperand(0); 8182 if (Select->getOpcode() != SystemZISD::SELECT_CCMASK) 8183 return SDValue(); 8184 8185 auto *SelectCCValid = dyn_cast<ConstantSDNode>(Select->getOperand(2)); 8186 auto *SelectCCMask = dyn_cast<ConstantSDNode>(Select->getOperand(3)); 8187 if (!SelectCCValid || !SelectCCMask) 8188 return SDValue(); 8189 int SelectCCValidVal = SelectCCValid->getZExtValue(); 8190 int SelectCCMaskVal = SelectCCMask->getZExtValue(); 8191 8192 auto *TrueVal = dyn_cast<ConstantSDNode>(Select->getOperand(0)); 8193 auto *FalseVal = dyn_cast<ConstantSDNode>(Select->getOperand(1)); 8194 if (!TrueVal || !FalseVal) 8195 return SDValue(); 8196 if (TrueVal->getZExtValue() == 1 && FalseVal->getZExtValue() == 0) 8197 ; 8198 else if (TrueVal->getZExtValue() == 0 && FalseVal->getZExtValue() == 1) 8199 SelectCCMaskVal ^= SelectCCValidVal; 8200 else 8201 return SDValue(); 8202 8203 if (SelectCCValidVal & ~CCValidVal) 8204 return SDValue(); 8205 if (SelectCCMaskVal != (CCMaskVal & SelectCCValidVal)) 8206 return SDValue(); 8207 8208 return Select->getOperand(4); 8209 } 8210 8211 SDValue SystemZTargetLowering::combineIntDIVREM( 8212 SDNode *N, DAGCombinerInfo &DCI) const { 8213 SelectionDAG &DAG = DCI.DAG; 8214 EVT VT = N->getValueType(0); 8215 // In the case where the divisor is a vector of constants a cheaper 8216 // sequence of instructions can replace the divide. BuildSDIV is called to 8217 // do this during DAG combining, but it only succeeds when it can build a 8218 // multiplication node. The only option for SystemZ is ISD::SMUL_LOHI, and 8219 // since it is not Legal but Custom it can only happen before 8220 // legalization. Therefore we must scalarize this early before Combine 8221 // 1. For widened vectors, this is already the result of type legalization. 8222 if (DCI.Level == BeforeLegalizeTypes && VT.isVector() && isTypeLegal(VT) && 8223 DAG.isConstantIntBuildVectorOrConstantInt(N->getOperand(1))) 8224 return DAG.UnrollVectorOp(N); 8225 return SDValue(); 8226 } 8227 8228 SDValue SystemZTargetLowering::combineINTRINSIC( 8229 SDNode *N, DAGCombinerInfo &DCI) const { 8230 SelectionDAG &DAG = DCI.DAG; 8231 8232 unsigned Id = N->getConstantOperandVal(1); 8233 switch (Id) { 8234 // VECTOR LOAD (RIGHTMOST) WITH LENGTH with a length operand of 15 8235 // or larger is simply a vector load. 8236 case Intrinsic::s390_vll: 8237 case Intrinsic::s390_vlrl: 8238 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) 8239 if (C->getZExtValue() >= 15) 8240 return DAG.getLoad(N->getValueType(0), SDLoc(N), N->getOperand(0), 8241 N->getOperand(3), MachinePointerInfo()); 8242 break; 8243 // Likewise for VECTOR STORE (RIGHTMOST) WITH LENGTH. 8244 case Intrinsic::s390_vstl: 8245 case Intrinsic::s390_vstrl: 8246 if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(3))) 8247 if (C->getZExtValue() >= 15) 8248 return DAG.getStore(N->getOperand(0), SDLoc(N), N->getOperand(2), 8249 N->getOperand(4), MachinePointerInfo()); 8250 break; 8251 } 8252 8253 return SDValue(); 8254 } 8255 8256 SDValue SystemZTargetLowering::unwrapAddress(SDValue N) const { 8257 if (N->getOpcode() == SystemZISD::PCREL_WRAPPER) 8258 return N->getOperand(0); 8259 return N; 8260 } 8261 8262 SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, 8263 DAGCombinerInfo &DCI) const { 8264 switch(N->getOpcode()) { 8265 default: break; 8266 case ISD::ZERO_EXTEND: return combineZERO_EXTEND(N, DCI); 8267 case ISD::SIGN_EXTEND: return combineSIGN_EXTEND(N, DCI); 8268 case ISD::SIGN_EXTEND_INREG: return combineSIGN_EXTEND_INREG(N, DCI); 8269 case SystemZISD::MERGE_HIGH: 8270 case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI); 8271 case ISD::LOAD: return combineLOAD(N, DCI); 8272 case ISD::STORE: return combineSTORE(N, DCI); 8273 case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI); 8274 case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI); 8275 case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI); 8276 case ISD::STRICT_FP_ROUND: 8277 case ISD::FP_ROUND: return combineFP_ROUND(N, DCI); 8278 case ISD::STRICT_FP_EXTEND: 8279 case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI); 8280 case ISD::SINT_TO_FP: 8281 case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI); 8282 case ISD::BSWAP: return combineBSWAP(N, DCI); 8283 case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI); 8284 case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI); 8285 case SystemZISD::GET_CCMASK: return combineGET_CCMASK(N, DCI); 8286 case ISD::SDIV: 8287 case ISD::UDIV: 8288 case ISD::SREM: 8289 case ISD::UREM: return combineIntDIVREM(N, DCI); 8290 case ISD::INTRINSIC_W_CHAIN: 8291 case ISD::INTRINSIC_VOID: return combineINTRINSIC(N, DCI); 8292 } 8293 8294 return SDValue(); 8295 } 8296 8297 // Return the demanded elements for the OpNo source operand of Op. DemandedElts 8298 // are for Op. 8299 static APInt getDemandedSrcElements(SDValue Op, const APInt &DemandedElts, 8300 unsigned OpNo) { 8301 EVT VT = Op.getValueType(); 8302 unsigned NumElts = (VT.isVector() ? VT.getVectorNumElements() : 1); 8303 APInt SrcDemE; 8304 unsigned Opcode = Op.getOpcode(); 8305 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 8306 unsigned Id = Op.getConstantOperandVal(0); 8307 switch (Id) { 8308 case Intrinsic::s390_vpksh: // PACKS 8309 case Intrinsic::s390_vpksf: 8310 case Intrinsic::s390_vpksg: 8311 case Intrinsic::s390_vpkshs: // PACKS_CC 8312 case Intrinsic::s390_vpksfs: 8313 case Intrinsic::s390_vpksgs: 8314 case Intrinsic::s390_vpklsh: // PACKLS 8315 case Intrinsic::s390_vpklsf: 8316 case Intrinsic::s390_vpklsg: 8317 case Intrinsic::s390_vpklshs: // PACKLS_CC 8318 case Intrinsic::s390_vpklsfs: 8319 case Intrinsic::s390_vpklsgs: 8320 // VECTOR PACK truncates the elements of two source vectors into one. 8321 SrcDemE = DemandedElts; 8322 if (OpNo == 2) 8323 SrcDemE.lshrInPlace(NumElts / 2); 8324 SrcDemE = SrcDemE.trunc(NumElts / 2); 8325 break; 8326 // VECTOR UNPACK extends half the elements of the source vector. 8327 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 8328 case Intrinsic::s390_vuphh: 8329 case Intrinsic::s390_vuphf: 8330 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH 8331 case Intrinsic::s390_vuplhh: 8332 case Intrinsic::s390_vuplhf: 8333 SrcDemE = APInt(NumElts * 2, 0); 8334 SrcDemE.insertBits(DemandedElts, 0); 8335 break; 8336 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 8337 case Intrinsic::s390_vuplhw: 8338 case Intrinsic::s390_vuplf: 8339 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW 8340 case Intrinsic::s390_vupllh: 8341 case Intrinsic::s390_vupllf: 8342 SrcDemE = APInt(NumElts * 2, 0); 8343 SrcDemE.insertBits(DemandedElts, NumElts); 8344 break; 8345 case Intrinsic::s390_vpdi: { 8346 // VECTOR PERMUTE DWORD IMMEDIATE selects one element from each source. 8347 SrcDemE = APInt(NumElts, 0); 8348 if (!DemandedElts[OpNo - 1]) 8349 break; 8350 unsigned Mask = Op.getConstantOperandVal(3); 8351 unsigned MaskBit = ((OpNo - 1) ? 1 : 4); 8352 // Demand input element 0 or 1, given by the mask bit value. 8353 SrcDemE.setBit((Mask & MaskBit)? 1 : 0); 8354 break; 8355 } 8356 case Intrinsic::s390_vsldb: { 8357 // VECTOR SHIFT LEFT DOUBLE BY BYTE 8358 assert(VT == MVT::v16i8 && "Unexpected type."); 8359 unsigned FirstIdx = Op.getConstantOperandVal(3); 8360 assert (FirstIdx > 0 && FirstIdx < 16 && "Unused operand."); 8361 unsigned NumSrc0Els = 16 - FirstIdx; 8362 SrcDemE = APInt(NumElts, 0); 8363 if (OpNo == 1) { 8364 APInt DemEls = DemandedElts.trunc(NumSrc0Els); 8365 SrcDemE.insertBits(DemEls, FirstIdx); 8366 } else { 8367 APInt DemEls = DemandedElts.lshr(NumSrc0Els); 8368 SrcDemE.insertBits(DemEls, 0); 8369 } 8370 break; 8371 } 8372 case Intrinsic::s390_vperm: 8373 SrcDemE = APInt::getAllOnes(NumElts); 8374 break; 8375 default: 8376 llvm_unreachable("Unhandled intrinsic."); 8377 break; 8378 } 8379 } else { 8380 switch (Opcode) { 8381 case SystemZISD::JOIN_DWORDS: 8382 // Scalar operand. 8383 SrcDemE = APInt(1, 1); 8384 break; 8385 case SystemZISD::SELECT_CCMASK: 8386 SrcDemE = DemandedElts; 8387 break; 8388 default: 8389 llvm_unreachable("Unhandled opcode."); 8390 break; 8391 } 8392 } 8393 return SrcDemE; 8394 } 8395 8396 static void computeKnownBitsBinOp(const SDValue Op, KnownBits &Known, 8397 const APInt &DemandedElts, 8398 const SelectionDAG &DAG, unsigned Depth, 8399 unsigned OpNo) { 8400 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); 8401 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); 8402 KnownBits LHSKnown = 8403 DAG.computeKnownBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); 8404 KnownBits RHSKnown = 8405 DAG.computeKnownBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); 8406 Known = LHSKnown.intersectWith(RHSKnown); 8407 } 8408 8409 void 8410 SystemZTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, 8411 KnownBits &Known, 8412 const APInt &DemandedElts, 8413 const SelectionDAG &DAG, 8414 unsigned Depth) const { 8415 Known.resetAll(); 8416 8417 // Intrinsic CC result is returned in the two low bits. 8418 unsigned tmp0, tmp1; // not used 8419 if (Op.getResNo() == 1 && isIntrinsicWithCC(Op, tmp0, tmp1)) { 8420 Known.Zero.setBitsFrom(2); 8421 return; 8422 } 8423 EVT VT = Op.getValueType(); 8424 if (Op.getResNo() != 0 || VT == MVT::Untyped) 8425 return; 8426 assert (Known.getBitWidth() == VT.getScalarSizeInBits() && 8427 "KnownBits does not match VT in bitwidth"); 8428 assert ((!VT.isVector() || 8429 (DemandedElts.getBitWidth() == VT.getVectorNumElements())) && 8430 "DemandedElts does not match VT number of elements"); 8431 unsigned BitWidth = Known.getBitWidth(); 8432 unsigned Opcode = Op.getOpcode(); 8433 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 8434 bool IsLogical = false; 8435 unsigned Id = Op.getConstantOperandVal(0); 8436 switch (Id) { 8437 case Intrinsic::s390_vpksh: // PACKS 8438 case Intrinsic::s390_vpksf: 8439 case Intrinsic::s390_vpksg: 8440 case Intrinsic::s390_vpkshs: // PACKS_CC 8441 case Intrinsic::s390_vpksfs: 8442 case Intrinsic::s390_vpksgs: 8443 case Intrinsic::s390_vpklsh: // PACKLS 8444 case Intrinsic::s390_vpklsf: 8445 case Intrinsic::s390_vpklsg: 8446 case Intrinsic::s390_vpklshs: // PACKLS_CC 8447 case Intrinsic::s390_vpklsfs: 8448 case Intrinsic::s390_vpklsgs: 8449 case Intrinsic::s390_vpdi: 8450 case Intrinsic::s390_vsldb: 8451 case Intrinsic::s390_vperm: 8452 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 1); 8453 break; 8454 case Intrinsic::s390_vuplhb: // VECTOR UNPACK LOGICAL HIGH 8455 case Intrinsic::s390_vuplhh: 8456 case Intrinsic::s390_vuplhf: 8457 case Intrinsic::s390_vupllb: // VECTOR UNPACK LOGICAL LOW 8458 case Intrinsic::s390_vupllh: 8459 case Intrinsic::s390_vupllf: 8460 IsLogical = true; 8461 [[fallthrough]]; 8462 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 8463 case Intrinsic::s390_vuphh: 8464 case Intrinsic::s390_vuphf: 8465 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 8466 case Intrinsic::s390_vuplhw: 8467 case Intrinsic::s390_vuplf: { 8468 SDValue SrcOp = Op.getOperand(1); 8469 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 0); 8470 Known = DAG.computeKnownBits(SrcOp, SrcDemE, Depth + 1); 8471 if (IsLogical) { 8472 Known = Known.zext(BitWidth); 8473 } else 8474 Known = Known.sext(BitWidth); 8475 break; 8476 } 8477 default: 8478 break; 8479 } 8480 } else { 8481 switch (Opcode) { 8482 case SystemZISD::JOIN_DWORDS: 8483 case SystemZISD::SELECT_CCMASK: 8484 computeKnownBitsBinOp(Op, Known, DemandedElts, DAG, Depth, 0); 8485 break; 8486 case SystemZISD::REPLICATE: { 8487 SDValue SrcOp = Op.getOperand(0); 8488 Known = DAG.computeKnownBits(SrcOp, Depth + 1); 8489 if (Known.getBitWidth() < BitWidth && isa<ConstantSDNode>(SrcOp)) 8490 Known = Known.sext(BitWidth); // VREPI sign extends the immedate. 8491 break; 8492 } 8493 default: 8494 break; 8495 } 8496 } 8497 8498 // Known has the width of the source operand(s). Adjust if needed to match 8499 // the passed bitwidth. 8500 if (Known.getBitWidth() != BitWidth) 8501 Known = Known.anyextOrTrunc(BitWidth); 8502 } 8503 8504 static unsigned computeNumSignBitsBinOp(SDValue Op, const APInt &DemandedElts, 8505 const SelectionDAG &DAG, unsigned Depth, 8506 unsigned OpNo) { 8507 APInt Src0DemE = getDemandedSrcElements(Op, DemandedElts, OpNo); 8508 unsigned LHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo), Src0DemE, Depth + 1); 8509 if (LHS == 1) return 1; // Early out. 8510 APInt Src1DemE = getDemandedSrcElements(Op, DemandedElts, OpNo + 1); 8511 unsigned RHS = DAG.ComputeNumSignBits(Op.getOperand(OpNo + 1), Src1DemE, Depth + 1); 8512 if (RHS == 1) return 1; // Early out. 8513 unsigned Common = std::min(LHS, RHS); 8514 unsigned SrcBitWidth = Op.getOperand(OpNo).getScalarValueSizeInBits(); 8515 EVT VT = Op.getValueType(); 8516 unsigned VTBits = VT.getScalarSizeInBits(); 8517 if (SrcBitWidth > VTBits) { // PACK 8518 unsigned SrcExtraBits = SrcBitWidth - VTBits; 8519 if (Common > SrcExtraBits) 8520 return (Common - SrcExtraBits); 8521 return 1; 8522 } 8523 assert (SrcBitWidth == VTBits && "Expected operands of same bitwidth."); 8524 return Common; 8525 } 8526 8527 unsigned 8528 SystemZTargetLowering::ComputeNumSignBitsForTargetNode( 8529 SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 8530 unsigned Depth) const { 8531 if (Op.getResNo() != 0) 8532 return 1; 8533 unsigned Opcode = Op.getOpcode(); 8534 if (Opcode == ISD::INTRINSIC_WO_CHAIN) { 8535 unsigned Id = Op.getConstantOperandVal(0); 8536 switch (Id) { 8537 case Intrinsic::s390_vpksh: // PACKS 8538 case Intrinsic::s390_vpksf: 8539 case Intrinsic::s390_vpksg: 8540 case Intrinsic::s390_vpkshs: // PACKS_CC 8541 case Intrinsic::s390_vpksfs: 8542 case Intrinsic::s390_vpksgs: 8543 case Intrinsic::s390_vpklsh: // PACKLS 8544 case Intrinsic::s390_vpklsf: 8545 case Intrinsic::s390_vpklsg: 8546 case Intrinsic::s390_vpklshs: // PACKLS_CC 8547 case Intrinsic::s390_vpklsfs: 8548 case Intrinsic::s390_vpklsgs: 8549 case Intrinsic::s390_vpdi: 8550 case Intrinsic::s390_vsldb: 8551 case Intrinsic::s390_vperm: 8552 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 1); 8553 case Intrinsic::s390_vuphb: // VECTOR UNPACK HIGH 8554 case Intrinsic::s390_vuphh: 8555 case Intrinsic::s390_vuphf: 8556 case Intrinsic::s390_vuplb: // VECTOR UNPACK LOW 8557 case Intrinsic::s390_vuplhw: 8558 case Intrinsic::s390_vuplf: { 8559 SDValue PackedOp = Op.getOperand(1); 8560 APInt SrcDemE = getDemandedSrcElements(Op, DemandedElts, 1); 8561 unsigned Tmp = DAG.ComputeNumSignBits(PackedOp, SrcDemE, Depth + 1); 8562 EVT VT = Op.getValueType(); 8563 unsigned VTBits = VT.getScalarSizeInBits(); 8564 Tmp += VTBits - PackedOp.getScalarValueSizeInBits(); 8565 return Tmp; 8566 } 8567 default: 8568 break; 8569 } 8570 } else { 8571 switch (Opcode) { 8572 case SystemZISD::SELECT_CCMASK: 8573 return computeNumSignBitsBinOp(Op, DemandedElts, DAG, Depth, 0); 8574 default: 8575 break; 8576 } 8577 } 8578 8579 return 1; 8580 } 8581 8582 bool SystemZTargetLowering:: 8583 isGuaranteedNotToBeUndefOrPoisonForTargetNode(SDValue Op, 8584 const APInt &DemandedElts, const SelectionDAG &DAG, 8585 bool PoisonOnly, unsigned Depth) const { 8586 switch (Op->getOpcode()) { 8587 case SystemZISD::PCREL_WRAPPER: 8588 case SystemZISD::PCREL_OFFSET: 8589 return true; 8590 } 8591 return false; 8592 } 8593 8594 unsigned 8595 SystemZTargetLowering::getStackProbeSize(const MachineFunction &MF) const { 8596 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 8597 unsigned StackAlign = TFI->getStackAlignment(); 8598 assert(StackAlign >=1 && isPowerOf2_32(StackAlign) && 8599 "Unexpected stack alignment"); 8600 // The default stack probe size is 4096 if the function has no 8601 // stack-probe-size attribute. 8602 unsigned StackProbeSize = 8603 MF.getFunction().getFnAttributeAsParsedInteger("stack-probe-size", 4096); 8604 // Round down to the stack alignment. 8605 StackProbeSize &= ~(StackAlign - 1); 8606 return StackProbeSize ? StackProbeSize : StackAlign; 8607 } 8608 8609 //===----------------------------------------------------------------------===// 8610 // Custom insertion 8611 //===----------------------------------------------------------------------===// 8612 8613 // Force base value Base into a register before MI. Return the register. 8614 static Register forceReg(MachineInstr &MI, MachineOperand &Base, 8615 const SystemZInstrInfo *TII) { 8616 MachineBasicBlock *MBB = MI.getParent(); 8617 MachineFunction &MF = *MBB->getParent(); 8618 MachineRegisterInfo &MRI = MF.getRegInfo(); 8619 8620 if (Base.isReg()) { 8621 // Copy Base into a new virtual register to help register coalescing in 8622 // cases with multiple uses. 8623 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8624 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::COPY), Reg) 8625 .add(Base); 8626 return Reg; 8627 } 8628 8629 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 8630 BuildMI(*MBB, MI, MI.getDebugLoc(), TII->get(SystemZ::LA), Reg) 8631 .add(Base) 8632 .addImm(0) 8633 .addReg(0); 8634 return Reg; 8635 } 8636 8637 // The CC operand of MI might be missing a kill marker because there 8638 // were multiple uses of CC, and ISel didn't know which to mark. 8639 // Figure out whether MI should have had a kill marker. 8640 static bool checkCCKill(MachineInstr &MI, MachineBasicBlock *MBB) { 8641 // Scan forward through BB for a use/def of CC. 8642 MachineBasicBlock::iterator miI(std::next(MachineBasicBlock::iterator(MI))); 8643 for (MachineBasicBlock::iterator miE = MBB->end(); miI != miE; ++miI) { 8644 const MachineInstr& mi = *miI; 8645 if (mi.readsRegister(SystemZ::CC, /*TRI=*/nullptr)) 8646 return false; 8647 if (mi.definesRegister(SystemZ::CC, /*TRI=*/nullptr)) 8648 break; // Should have kill-flag - update below. 8649 } 8650 8651 // If we hit the end of the block, check whether CC is live into a 8652 // successor. 8653 if (miI == MBB->end()) { 8654 for (const MachineBasicBlock *Succ : MBB->successors()) 8655 if (Succ->isLiveIn(SystemZ::CC)) 8656 return false; 8657 } 8658 8659 return true; 8660 } 8661 8662 // Return true if it is OK for this Select pseudo-opcode to be cascaded 8663 // together with other Select pseudo-opcodes into a single basic-block with 8664 // a conditional jump around it. 8665 static bool isSelectPseudo(MachineInstr &MI) { 8666 switch (MI.getOpcode()) { 8667 case SystemZ::Select32: 8668 case SystemZ::Select64: 8669 case SystemZ::Select128: 8670 case SystemZ::SelectF32: 8671 case SystemZ::SelectF64: 8672 case SystemZ::SelectF128: 8673 case SystemZ::SelectVR32: 8674 case SystemZ::SelectVR64: 8675 case SystemZ::SelectVR128: 8676 return true; 8677 8678 default: 8679 return false; 8680 } 8681 } 8682 8683 // Helper function, which inserts PHI functions into SinkMBB: 8684 // %Result(i) = phi [ %FalseValue(i), FalseMBB ], [ %TrueValue(i), TrueMBB ], 8685 // where %FalseValue(i) and %TrueValue(i) are taken from Selects. 8686 static void createPHIsForSelects(SmallVector<MachineInstr*, 8> &Selects, 8687 MachineBasicBlock *TrueMBB, 8688 MachineBasicBlock *FalseMBB, 8689 MachineBasicBlock *SinkMBB) { 8690 MachineFunction *MF = TrueMBB->getParent(); 8691 const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); 8692 8693 MachineInstr *FirstMI = Selects.front(); 8694 unsigned CCValid = FirstMI->getOperand(3).getImm(); 8695 unsigned CCMask = FirstMI->getOperand(4).getImm(); 8696 8697 MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); 8698 8699 // As we are creating the PHIs, we have to be careful if there is more than 8700 // one. Later Selects may reference the results of earlier Selects, but later 8701 // PHIs have to reference the individual true/false inputs from earlier PHIs. 8702 // That also means that PHI construction must work forward from earlier to 8703 // later, and that the code must maintain a mapping from earlier PHI's 8704 // destination registers, and the registers that went into the PHI. 8705 DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; 8706 8707 for (auto *MI : Selects) { 8708 Register DestReg = MI->getOperand(0).getReg(); 8709 Register TrueReg = MI->getOperand(1).getReg(); 8710 Register FalseReg = MI->getOperand(2).getReg(); 8711 8712 // If this Select we are generating is the opposite condition from 8713 // the jump we generated, then we have to swap the operands for the 8714 // PHI that is going to be generated. 8715 if (MI->getOperand(4).getImm() == (CCValid ^ CCMask)) 8716 std::swap(TrueReg, FalseReg); 8717 8718 if (auto It = RegRewriteTable.find(TrueReg); It != RegRewriteTable.end()) 8719 TrueReg = It->second.first; 8720 8721 if (auto It = RegRewriteTable.find(FalseReg); It != RegRewriteTable.end()) 8722 FalseReg = It->second.second; 8723 8724 DebugLoc DL = MI->getDebugLoc(); 8725 BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(SystemZ::PHI), DestReg) 8726 .addReg(TrueReg).addMBB(TrueMBB) 8727 .addReg(FalseReg).addMBB(FalseMBB); 8728 8729 // Add this PHI to the rewrite table. 8730 RegRewriteTable[DestReg] = std::make_pair(TrueReg, FalseReg); 8731 } 8732 8733 MF->getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 8734 } 8735 8736 MachineBasicBlock * 8737 SystemZTargetLowering::emitAdjCallStack(MachineInstr &MI, 8738 MachineBasicBlock *BB) const { 8739 MachineFunction &MF = *BB->getParent(); 8740 MachineFrameInfo &MFI = MF.getFrameInfo(); 8741 auto *TFL = Subtarget.getFrameLowering<SystemZFrameLowering>(); 8742 assert(TFL->hasReservedCallFrame(MF) && 8743 "ADJSTACKDOWN and ADJSTACKUP should be no-ops"); 8744 (void)TFL; 8745 // Get the MaxCallFrameSize value and erase MI since it serves no further 8746 // purpose as the call frame is statically reserved in the prolog. Set 8747 // AdjustsStack as MI is *not* mapped as a frame instruction. 8748 uint32_t NumBytes = MI.getOperand(0).getImm(); 8749 if (NumBytes > MFI.getMaxCallFrameSize()) 8750 MFI.setMaxCallFrameSize(NumBytes); 8751 MFI.setAdjustsStack(true); 8752 8753 MI.eraseFromParent(); 8754 return BB; 8755 } 8756 8757 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI. 8758 MachineBasicBlock * 8759 SystemZTargetLowering::emitSelect(MachineInstr &MI, 8760 MachineBasicBlock *MBB) const { 8761 assert(isSelectPseudo(MI) && "Bad call to emitSelect()"); 8762 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8763 8764 unsigned CCValid = MI.getOperand(3).getImm(); 8765 unsigned CCMask = MI.getOperand(4).getImm(); 8766 8767 // If we have a sequence of Select* pseudo instructions using the 8768 // same condition code value, we want to expand all of them into 8769 // a single pair of basic blocks using the same condition. 8770 SmallVector<MachineInstr*, 8> Selects; 8771 SmallVector<MachineInstr*, 8> DbgValues; 8772 Selects.push_back(&MI); 8773 unsigned Count = 0; 8774 for (MachineInstr &NextMI : llvm::make_range( 8775 std::next(MachineBasicBlock::iterator(MI)), MBB->end())) { 8776 if (isSelectPseudo(NextMI)) { 8777 assert(NextMI.getOperand(3).getImm() == CCValid && 8778 "Bad CCValid operands since CC was not redefined."); 8779 if (NextMI.getOperand(4).getImm() == CCMask || 8780 NextMI.getOperand(4).getImm() == (CCValid ^ CCMask)) { 8781 Selects.push_back(&NextMI); 8782 continue; 8783 } 8784 break; 8785 } 8786 if (NextMI.definesRegister(SystemZ::CC, /*TRI=*/nullptr) || 8787 NextMI.usesCustomInsertionHook()) 8788 break; 8789 bool User = false; 8790 for (auto *SelMI : Selects) 8791 if (NextMI.readsVirtualRegister(SelMI->getOperand(0).getReg())) { 8792 User = true; 8793 break; 8794 } 8795 if (NextMI.isDebugInstr()) { 8796 if (User) { 8797 assert(NextMI.isDebugValue() && "Unhandled debug opcode."); 8798 DbgValues.push_back(&NextMI); 8799 } 8800 } else if (User || ++Count > 20) 8801 break; 8802 } 8803 8804 MachineInstr *LastMI = Selects.back(); 8805 bool CCKilled = (LastMI->killsRegister(SystemZ::CC, /*TRI=*/nullptr) || 8806 checkCCKill(*LastMI, MBB)); 8807 MachineBasicBlock *StartMBB = MBB; 8808 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(LastMI, MBB); 8809 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); 8810 8811 // Unless CC was killed in the last Select instruction, mark it as 8812 // live-in to both FalseMBB and JoinMBB. 8813 if (!CCKilled) { 8814 FalseMBB->addLiveIn(SystemZ::CC); 8815 JoinMBB->addLiveIn(SystemZ::CC); 8816 } 8817 8818 // StartMBB: 8819 // BRC CCMask, JoinMBB 8820 // # fallthrough to FalseMBB 8821 MBB = StartMBB; 8822 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) 8823 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 8824 MBB->addSuccessor(JoinMBB); 8825 MBB->addSuccessor(FalseMBB); 8826 8827 // FalseMBB: 8828 // # fallthrough to JoinMBB 8829 MBB = FalseMBB; 8830 MBB->addSuccessor(JoinMBB); 8831 8832 // JoinMBB: 8833 // %Result = phi [ %FalseReg, FalseMBB ], [ %TrueReg, StartMBB ] 8834 // ... 8835 MBB = JoinMBB; 8836 createPHIsForSelects(Selects, StartMBB, FalseMBB, MBB); 8837 for (auto *SelMI : Selects) 8838 SelMI->eraseFromParent(); 8839 8840 MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI(); 8841 for (auto *DbgMI : DbgValues) 8842 MBB->splice(InsertPos, StartMBB, DbgMI); 8843 8844 return JoinMBB; 8845 } 8846 8847 // Implement EmitInstrWithCustomInserter for pseudo CondStore* instruction MI. 8848 // StoreOpcode is the store to use and Invert says whether the store should 8849 // happen when the condition is false rather than true. If a STORE ON 8850 // CONDITION is available, STOCOpcode is its opcode, otherwise it is 0. 8851 MachineBasicBlock *SystemZTargetLowering::emitCondStore(MachineInstr &MI, 8852 MachineBasicBlock *MBB, 8853 unsigned StoreOpcode, 8854 unsigned STOCOpcode, 8855 bool Invert) const { 8856 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8857 8858 Register SrcReg = MI.getOperand(0).getReg(); 8859 MachineOperand Base = MI.getOperand(1); 8860 int64_t Disp = MI.getOperand(2).getImm(); 8861 Register IndexReg = MI.getOperand(3).getReg(); 8862 unsigned CCValid = MI.getOperand(4).getImm(); 8863 unsigned CCMask = MI.getOperand(5).getImm(); 8864 DebugLoc DL = MI.getDebugLoc(); 8865 8866 StoreOpcode = TII->getOpcodeForOffset(StoreOpcode, Disp); 8867 8868 // ISel pattern matching also adds a load memory operand of the same 8869 // address, so take special care to find the storing memory operand. 8870 MachineMemOperand *MMO = nullptr; 8871 for (auto *I : MI.memoperands()) 8872 if (I->isStore()) { 8873 MMO = I; 8874 break; 8875 } 8876 8877 // Use STOCOpcode if possible. We could use different store patterns in 8878 // order to avoid matching the index register, but the performance trade-offs 8879 // might be more complicated in that case. 8880 if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { 8881 if (Invert) 8882 CCMask ^= CCValid; 8883 8884 BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) 8885 .addReg(SrcReg) 8886 .add(Base) 8887 .addImm(Disp) 8888 .addImm(CCValid) 8889 .addImm(CCMask) 8890 .addMemOperand(MMO); 8891 8892 MI.eraseFromParent(); 8893 return MBB; 8894 } 8895 8896 // Get the condition needed to branch around the store. 8897 if (!Invert) 8898 CCMask ^= CCValid; 8899 8900 MachineBasicBlock *StartMBB = MBB; 8901 MachineBasicBlock *JoinMBB = SystemZ::splitBlockBefore(MI, MBB); 8902 MachineBasicBlock *FalseMBB = SystemZ::emitBlockAfter(StartMBB); 8903 8904 // Unless CC was killed in the CondStore instruction, mark it as 8905 // live-in to both FalseMBB and JoinMBB. 8906 if (!MI.killsRegister(SystemZ::CC, /*TRI=*/nullptr) && 8907 !checkCCKill(MI, JoinMBB)) { 8908 FalseMBB->addLiveIn(SystemZ::CC); 8909 JoinMBB->addLiveIn(SystemZ::CC); 8910 } 8911 8912 // StartMBB: 8913 // BRC CCMask, JoinMBB 8914 // # fallthrough to FalseMBB 8915 MBB = StartMBB; 8916 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 8917 .addImm(CCValid).addImm(CCMask).addMBB(JoinMBB); 8918 MBB->addSuccessor(JoinMBB); 8919 MBB->addSuccessor(FalseMBB); 8920 8921 // FalseMBB: 8922 // store %SrcReg, %Disp(%Index,%Base) 8923 // # fallthrough to JoinMBB 8924 MBB = FalseMBB; 8925 BuildMI(MBB, DL, TII->get(StoreOpcode)) 8926 .addReg(SrcReg) 8927 .add(Base) 8928 .addImm(Disp) 8929 .addReg(IndexReg) 8930 .addMemOperand(MMO); 8931 MBB->addSuccessor(JoinMBB); 8932 8933 MI.eraseFromParent(); 8934 return JoinMBB; 8935 } 8936 8937 // Implement EmitInstrWithCustomInserter for pseudo [SU]Cmp128Hi instruction MI. 8938 MachineBasicBlock * 8939 SystemZTargetLowering::emitICmp128Hi(MachineInstr &MI, 8940 MachineBasicBlock *MBB, 8941 bool Unsigned) const { 8942 MachineFunction &MF = *MBB->getParent(); 8943 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 8944 MachineRegisterInfo &MRI = MF.getRegInfo(); 8945 8946 // Synthetic instruction to compare 128-bit values. 8947 // Sets CC 1 if Op0 > Op1, sets a different CC otherwise. 8948 Register Op0 = MI.getOperand(0).getReg(); 8949 Register Op1 = MI.getOperand(1).getReg(); 8950 8951 MachineBasicBlock *StartMBB = MBB; 8952 MachineBasicBlock *JoinMBB = SystemZ::splitBlockAfter(MI, MBB); 8953 MachineBasicBlock *HiEqMBB = SystemZ::emitBlockAfter(StartMBB); 8954 8955 // StartMBB: 8956 // 8957 // Use VECTOR ELEMENT COMPARE [LOGICAL] to compare the high parts. 8958 // Swap the inputs to get: 8959 // CC 1 if high(Op0) > high(Op1) 8960 // CC 2 if high(Op0) < high(Op1) 8961 // CC 0 if high(Op0) == high(Op1) 8962 // 8963 // If CC != 0, we'd done, so jump over the next instruction. 8964 // 8965 // VEC[L]G Op1, Op0 8966 // JNE JoinMBB 8967 // # fallthrough to HiEqMBB 8968 MBB = StartMBB; 8969 int HiOpcode = Unsigned? SystemZ::VECLG : SystemZ::VECG; 8970 BuildMI(MBB, MI.getDebugLoc(), TII->get(HiOpcode)) 8971 .addReg(Op1).addReg(Op0); 8972 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::BRC)) 8973 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE).addMBB(JoinMBB); 8974 MBB->addSuccessor(JoinMBB); 8975 MBB->addSuccessor(HiEqMBB); 8976 8977 // HiEqMBB: 8978 // 8979 // Otherwise, use VECTOR COMPARE HIGH LOGICAL. 8980 // Since we already know the high parts are equal, the CC 8981 // result will only depend on the low parts: 8982 // CC 1 if low(Op0) > low(Op1) 8983 // CC 3 if low(Op0) <= low(Op1) 8984 // 8985 // VCHLGS Tmp, Op0, Op1 8986 // # fallthrough to JoinMBB 8987 MBB = HiEqMBB; 8988 Register Temp = MRI.createVirtualRegister(&SystemZ::VR128BitRegClass); 8989 BuildMI(MBB, MI.getDebugLoc(), TII->get(SystemZ::VCHLGS), Temp) 8990 .addReg(Op0).addReg(Op1); 8991 MBB->addSuccessor(JoinMBB); 8992 8993 // Mark CC as live-in to JoinMBB. 8994 JoinMBB->addLiveIn(SystemZ::CC); 8995 8996 MI.eraseFromParent(); 8997 return JoinMBB; 8998 } 8999 9000 // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_LOADW_* or 9001 // ATOMIC_SWAPW instruction MI. BinOpcode is the instruction that performs 9002 // the binary operation elided by "*", or 0 for ATOMIC_SWAPW. Invert says 9003 // whether the field should be inverted after performing BinOpcode (e.g. for 9004 // NAND). 9005 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadBinary( 9006 MachineInstr &MI, MachineBasicBlock *MBB, unsigned BinOpcode, 9007 bool Invert) const { 9008 MachineFunction &MF = *MBB->getParent(); 9009 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9010 MachineRegisterInfo &MRI = MF.getRegInfo(); 9011 9012 // Extract the operands. Base can be a register or a frame index. 9013 // Src2 can be a register or immediate. 9014 Register Dest = MI.getOperand(0).getReg(); 9015 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 9016 int64_t Disp = MI.getOperand(2).getImm(); 9017 MachineOperand Src2 = earlyUseOperand(MI.getOperand(3)); 9018 Register BitShift = MI.getOperand(4).getReg(); 9019 Register NegBitShift = MI.getOperand(5).getReg(); 9020 unsigned BitSize = MI.getOperand(6).getImm(); 9021 DebugLoc DL = MI.getDebugLoc(); 9022 9023 // Get the right opcodes for the displacement. 9024 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 9025 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 9026 assert(LOpcode && CSOpcode && "Displacement out of range"); 9027 9028 // Create virtual registers for temporary results. 9029 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9030 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9031 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9032 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9033 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9034 9035 // Insert a basic block for the main loop. 9036 MachineBasicBlock *StartMBB = MBB; 9037 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9038 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9039 9040 // StartMBB: 9041 // ... 9042 // %OrigVal = L Disp(%Base) 9043 // # fall through to LoopMBB 9044 MBB = StartMBB; 9045 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); 9046 MBB->addSuccessor(LoopMBB); 9047 9048 // LoopMBB: 9049 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, LoopMBB ] 9050 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 9051 // %RotatedNewVal = OP %RotatedOldVal, %Src2 9052 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 9053 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 9054 // JNE LoopMBB 9055 // # fall through to DoneMBB 9056 MBB = LoopMBB; 9057 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 9058 .addReg(OrigVal).addMBB(StartMBB) 9059 .addReg(Dest).addMBB(LoopMBB); 9060 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 9061 .addReg(OldVal).addReg(BitShift).addImm(0); 9062 if (Invert) { 9063 // Perform the operation normally and then invert every bit of the field. 9064 Register Tmp = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9065 BuildMI(MBB, DL, TII->get(BinOpcode), Tmp).addReg(RotatedOldVal).add(Src2); 9066 // XILF with the upper BitSize bits set. 9067 BuildMI(MBB, DL, TII->get(SystemZ::XILF), RotatedNewVal) 9068 .addReg(Tmp).addImm(-1U << (32 - BitSize)); 9069 } else if (BinOpcode) 9070 // A simply binary operation. 9071 BuildMI(MBB, DL, TII->get(BinOpcode), RotatedNewVal) 9072 .addReg(RotatedOldVal) 9073 .add(Src2); 9074 else 9075 // Use RISBG to rotate Src2 into position and use it to replace the 9076 // field in RotatedOldVal. 9077 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedNewVal) 9078 .addReg(RotatedOldVal).addReg(Src2.getReg()) 9079 .addImm(32).addImm(31 + BitSize).addImm(32 - BitSize); 9080 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 9081 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 9082 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 9083 .addReg(OldVal) 9084 .addReg(NewVal) 9085 .add(Base) 9086 .addImm(Disp); 9087 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9088 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 9089 MBB->addSuccessor(LoopMBB); 9090 MBB->addSuccessor(DoneMBB); 9091 9092 MI.eraseFromParent(); 9093 return DoneMBB; 9094 } 9095 9096 // Implement EmitInstrWithCustomInserter for subword pseudo 9097 // ATOMIC_LOADW_{,U}{MIN,MAX} instruction MI. CompareOpcode is the 9098 // instruction that should be used to compare the current field with the 9099 // minimum or maximum value. KeepOldMask is the BRC condition-code mask 9100 // for when the current field should be kept. 9101 MachineBasicBlock *SystemZTargetLowering::emitAtomicLoadMinMax( 9102 MachineInstr &MI, MachineBasicBlock *MBB, unsigned CompareOpcode, 9103 unsigned KeepOldMask) const { 9104 MachineFunction &MF = *MBB->getParent(); 9105 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9106 MachineRegisterInfo &MRI = MF.getRegInfo(); 9107 9108 // Extract the operands. Base can be a register or a frame index. 9109 Register Dest = MI.getOperand(0).getReg(); 9110 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 9111 int64_t Disp = MI.getOperand(2).getImm(); 9112 Register Src2 = MI.getOperand(3).getReg(); 9113 Register BitShift = MI.getOperand(4).getReg(); 9114 Register NegBitShift = MI.getOperand(5).getReg(); 9115 unsigned BitSize = MI.getOperand(6).getImm(); 9116 DebugLoc DL = MI.getDebugLoc(); 9117 9118 // Get the right opcodes for the displacement. 9119 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 9120 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 9121 assert(LOpcode && CSOpcode && "Displacement out of range"); 9122 9123 // Create virtual registers for temporary results. 9124 Register OrigVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9125 Register OldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9126 Register NewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9127 Register RotatedOldVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9128 Register RotatedAltVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9129 Register RotatedNewVal = MRI.createVirtualRegister(&SystemZ::GR32BitRegClass); 9130 9131 // Insert 3 basic blocks for the loop. 9132 MachineBasicBlock *StartMBB = MBB; 9133 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9134 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9135 MachineBasicBlock *UseAltMBB = SystemZ::emitBlockAfter(LoopMBB); 9136 MachineBasicBlock *UpdateMBB = SystemZ::emitBlockAfter(UseAltMBB); 9137 9138 // StartMBB: 9139 // ... 9140 // %OrigVal = L Disp(%Base) 9141 // # fall through to LoopMBB 9142 MBB = StartMBB; 9143 BuildMI(MBB, DL, TII->get(LOpcode), OrigVal).add(Base).addImm(Disp).addReg(0); 9144 MBB->addSuccessor(LoopMBB); 9145 9146 // LoopMBB: 9147 // %OldVal = phi [ %OrigVal, StartMBB ], [ %Dest, UpdateMBB ] 9148 // %RotatedOldVal = RLL %OldVal, 0(%BitShift) 9149 // CompareOpcode %RotatedOldVal, %Src2 9150 // BRC KeepOldMask, UpdateMBB 9151 MBB = LoopMBB; 9152 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 9153 .addReg(OrigVal).addMBB(StartMBB) 9154 .addReg(Dest).addMBB(UpdateMBB); 9155 BuildMI(MBB, DL, TII->get(SystemZ::RLL), RotatedOldVal) 9156 .addReg(OldVal).addReg(BitShift).addImm(0); 9157 BuildMI(MBB, DL, TII->get(CompareOpcode)) 9158 .addReg(RotatedOldVal).addReg(Src2); 9159 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9160 .addImm(SystemZ::CCMASK_ICMP).addImm(KeepOldMask).addMBB(UpdateMBB); 9161 MBB->addSuccessor(UpdateMBB); 9162 MBB->addSuccessor(UseAltMBB); 9163 9164 // UseAltMBB: 9165 // %RotatedAltVal = RISBG %RotatedOldVal, %Src2, 32, 31 + BitSize, 0 9166 // # fall through to UpdateMBB 9167 MBB = UseAltMBB; 9168 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RotatedAltVal) 9169 .addReg(RotatedOldVal).addReg(Src2) 9170 .addImm(32).addImm(31 + BitSize).addImm(0); 9171 MBB->addSuccessor(UpdateMBB); 9172 9173 // UpdateMBB: 9174 // %RotatedNewVal = PHI [ %RotatedOldVal, LoopMBB ], 9175 // [ %RotatedAltVal, UseAltMBB ] 9176 // %NewVal = RLL %RotatedNewVal, 0(%NegBitShift) 9177 // %Dest = CS %OldVal, %NewVal, Disp(%Base) 9178 // JNE LoopMBB 9179 // # fall through to DoneMBB 9180 MBB = UpdateMBB; 9181 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RotatedNewVal) 9182 .addReg(RotatedOldVal).addMBB(LoopMBB) 9183 .addReg(RotatedAltVal).addMBB(UseAltMBB); 9184 BuildMI(MBB, DL, TII->get(SystemZ::RLL), NewVal) 9185 .addReg(RotatedNewVal).addReg(NegBitShift).addImm(0); 9186 BuildMI(MBB, DL, TII->get(CSOpcode), Dest) 9187 .addReg(OldVal) 9188 .addReg(NewVal) 9189 .add(Base) 9190 .addImm(Disp); 9191 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9192 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 9193 MBB->addSuccessor(LoopMBB); 9194 MBB->addSuccessor(DoneMBB); 9195 9196 MI.eraseFromParent(); 9197 return DoneMBB; 9198 } 9199 9200 // Implement EmitInstrWithCustomInserter for subword pseudo ATOMIC_CMP_SWAPW 9201 // instruction MI. 9202 MachineBasicBlock * 9203 SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr &MI, 9204 MachineBasicBlock *MBB) const { 9205 MachineFunction &MF = *MBB->getParent(); 9206 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9207 MachineRegisterInfo &MRI = MF.getRegInfo(); 9208 9209 // Extract the operands. Base can be a register or a frame index. 9210 Register Dest = MI.getOperand(0).getReg(); 9211 MachineOperand Base = earlyUseOperand(MI.getOperand(1)); 9212 int64_t Disp = MI.getOperand(2).getImm(); 9213 Register CmpVal = MI.getOperand(3).getReg(); 9214 Register OrigSwapVal = MI.getOperand(4).getReg(); 9215 Register BitShift = MI.getOperand(5).getReg(); 9216 Register NegBitShift = MI.getOperand(6).getReg(); 9217 int64_t BitSize = MI.getOperand(7).getImm(); 9218 DebugLoc DL = MI.getDebugLoc(); 9219 9220 const TargetRegisterClass *RC = &SystemZ::GR32BitRegClass; 9221 9222 // Get the right opcodes for the displacement and zero-extension. 9223 unsigned LOpcode = TII->getOpcodeForOffset(SystemZ::L, Disp); 9224 unsigned CSOpcode = TII->getOpcodeForOffset(SystemZ::CS, Disp); 9225 unsigned ZExtOpcode = BitSize == 8 ? SystemZ::LLCR : SystemZ::LLHR; 9226 assert(LOpcode && CSOpcode && "Displacement out of range"); 9227 9228 // Create virtual registers for temporary results. 9229 Register OrigOldVal = MRI.createVirtualRegister(RC); 9230 Register OldVal = MRI.createVirtualRegister(RC); 9231 Register SwapVal = MRI.createVirtualRegister(RC); 9232 Register StoreVal = MRI.createVirtualRegister(RC); 9233 Register OldValRot = MRI.createVirtualRegister(RC); 9234 Register RetryOldVal = MRI.createVirtualRegister(RC); 9235 Register RetrySwapVal = MRI.createVirtualRegister(RC); 9236 9237 // Insert 2 basic blocks for the loop. 9238 MachineBasicBlock *StartMBB = MBB; 9239 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9240 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9241 MachineBasicBlock *SetMBB = SystemZ::emitBlockAfter(LoopMBB); 9242 9243 // StartMBB: 9244 // ... 9245 // %OrigOldVal = L Disp(%Base) 9246 // # fall through to LoopMBB 9247 MBB = StartMBB; 9248 BuildMI(MBB, DL, TII->get(LOpcode), OrigOldVal) 9249 .add(Base) 9250 .addImm(Disp) 9251 .addReg(0); 9252 MBB->addSuccessor(LoopMBB); 9253 9254 // LoopMBB: 9255 // %OldVal = phi [ %OrigOldVal, EntryBB ], [ %RetryOldVal, SetMBB ] 9256 // %SwapVal = phi [ %OrigSwapVal, EntryBB ], [ %RetrySwapVal, SetMBB ] 9257 // %OldValRot = RLL %OldVal, BitSize(%BitShift) 9258 // ^^ The low BitSize bits contain the field 9259 // of interest. 9260 // %RetrySwapVal = RISBG32 %SwapVal, %OldValRot, 32, 63-BitSize, 0 9261 // ^^ Replace the upper 32-BitSize bits of the 9262 // swap value with those that we loaded and rotated. 9263 // %Dest = LL[CH] %OldValRot 9264 // CR %Dest, %CmpVal 9265 // JNE DoneMBB 9266 // # Fall through to SetMBB 9267 MBB = LoopMBB; 9268 BuildMI(MBB, DL, TII->get(SystemZ::PHI), OldVal) 9269 .addReg(OrigOldVal).addMBB(StartMBB) 9270 .addReg(RetryOldVal).addMBB(SetMBB); 9271 BuildMI(MBB, DL, TII->get(SystemZ::PHI), SwapVal) 9272 .addReg(OrigSwapVal).addMBB(StartMBB) 9273 .addReg(RetrySwapVal).addMBB(SetMBB); 9274 BuildMI(MBB, DL, TII->get(SystemZ::RLL), OldValRot) 9275 .addReg(OldVal).addReg(BitShift).addImm(BitSize); 9276 BuildMI(MBB, DL, TII->get(SystemZ::RISBG32), RetrySwapVal) 9277 .addReg(SwapVal).addReg(OldValRot).addImm(32).addImm(63 - BitSize).addImm(0); 9278 BuildMI(MBB, DL, TII->get(ZExtOpcode), Dest) 9279 .addReg(OldValRot); 9280 BuildMI(MBB, DL, TII->get(SystemZ::CR)) 9281 .addReg(Dest).addReg(CmpVal); 9282 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9283 .addImm(SystemZ::CCMASK_ICMP) 9284 .addImm(SystemZ::CCMASK_CMP_NE).addMBB(DoneMBB); 9285 MBB->addSuccessor(DoneMBB); 9286 MBB->addSuccessor(SetMBB); 9287 9288 // SetMBB: 9289 // %StoreVal = RLL %RetrySwapVal, -BitSize(%NegBitShift) 9290 // ^^ Rotate the new field to its proper position. 9291 // %RetryOldVal = CS %OldVal, %StoreVal, Disp(%Base) 9292 // JNE LoopMBB 9293 // # fall through to ExitMBB 9294 MBB = SetMBB; 9295 BuildMI(MBB, DL, TII->get(SystemZ::RLL), StoreVal) 9296 .addReg(RetrySwapVal).addReg(NegBitShift).addImm(-BitSize); 9297 BuildMI(MBB, DL, TII->get(CSOpcode), RetryOldVal) 9298 .addReg(OldVal) 9299 .addReg(StoreVal) 9300 .add(Base) 9301 .addImm(Disp); 9302 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9303 .addImm(SystemZ::CCMASK_CS).addImm(SystemZ::CCMASK_CS_NE).addMBB(LoopMBB); 9304 MBB->addSuccessor(LoopMBB); 9305 MBB->addSuccessor(DoneMBB); 9306 9307 // If the CC def wasn't dead in the ATOMIC_CMP_SWAPW, mark CC as live-in 9308 // to the block after the loop. At this point, CC may have been defined 9309 // either by the CR in LoopMBB or by the CS in SetMBB. 9310 if (!MI.registerDefIsDead(SystemZ::CC, /*TRI=*/nullptr)) 9311 DoneMBB->addLiveIn(SystemZ::CC); 9312 9313 MI.eraseFromParent(); 9314 return DoneMBB; 9315 } 9316 9317 // Emit a move from two GR64s to a GR128. 9318 MachineBasicBlock * 9319 SystemZTargetLowering::emitPair128(MachineInstr &MI, 9320 MachineBasicBlock *MBB) const { 9321 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9322 const DebugLoc &DL = MI.getDebugLoc(); 9323 9324 Register Dest = MI.getOperand(0).getReg(); 9325 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), Dest) 9326 .add(MI.getOperand(1)) 9327 .addImm(SystemZ::subreg_h64) 9328 .add(MI.getOperand(2)) 9329 .addImm(SystemZ::subreg_l64); 9330 MI.eraseFromParent(); 9331 return MBB; 9332 } 9333 9334 // Emit an extension from a GR64 to a GR128. ClearEven is true 9335 // if the high register of the GR128 value must be cleared or false if 9336 // it's "don't care". 9337 MachineBasicBlock *SystemZTargetLowering::emitExt128(MachineInstr &MI, 9338 MachineBasicBlock *MBB, 9339 bool ClearEven) const { 9340 MachineFunction &MF = *MBB->getParent(); 9341 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9342 MachineRegisterInfo &MRI = MF.getRegInfo(); 9343 DebugLoc DL = MI.getDebugLoc(); 9344 9345 Register Dest = MI.getOperand(0).getReg(); 9346 Register Src = MI.getOperand(1).getReg(); 9347 Register In128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 9348 9349 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::IMPLICIT_DEF), In128); 9350 if (ClearEven) { 9351 Register NewIn128 = MRI.createVirtualRegister(&SystemZ::GR128BitRegClass); 9352 Register Zero64 = MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 9353 9354 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LLILL), Zero64) 9355 .addImm(0); 9356 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), NewIn128) 9357 .addReg(In128).addReg(Zero64).addImm(SystemZ::subreg_h64); 9358 In128 = NewIn128; 9359 } 9360 BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dest) 9361 .addReg(In128).addReg(Src).addImm(SystemZ::subreg_l64); 9362 9363 MI.eraseFromParent(); 9364 return MBB; 9365 } 9366 9367 MachineBasicBlock * 9368 SystemZTargetLowering::emitMemMemWrapper(MachineInstr &MI, 9369 MachineBasicBlock *MBB, 9370 unsigned Opcode, bool IsMemset) const { 9371 MachineFunction &MF = *MBB->getParent(); 9372 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9373 MachineRegisterInfo &MRI = MF.getRegInfo(); 9374 DebugLoc DL = MI.getDebugLoc(); 9375 9376 MachineOperand DestBase = earlyUseOperand(MI.getOperand(0)); 9377 uint64_t DestDisp = MI.getOperand(1).getImm(); 9378 MachineOperand SrcBase = MachineOperand::CreateReg(0U, false); 9379 uint64_t SrcDisp; 9380 9381 // Fold the displacement Disp if it is out of range. 9382 auto foldDisplIfNeeded = [&](MachineOperand &Base, uint64_t &Disp) -> void { 9383 if (!isUInt<12>(Disp)) { 9384 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9385 unsigned Opcode = TII->getOpcodeForOffset(SystemZ::LA, Disp); 9386 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opcode), Reg) 9387 .add(Base).addImm(Disp).addReg(0); 9388 Base = MachineOperand::CreateReg(Reg, false); 9389 Disp = 0; 9390 } 9391 }; 9392 9393 if (!IsMemset) { 9394 SrcBase = earlyUseOperand(MI.getOperand(2)); 9395 SrcDisp = MI.getOperand(3).getImm(); 9396 } else { 9397 SrcBase = DestBase; 9398 SrcDisp = DestDisp++; 9399 foldDisplIfNeeded(DestBase, DestDisp); 9400 } 9401 9402 MachineOperand &LengthMO = MI.getOperand(IsMemset ? 2 : 4); 9403 bool IsImmForm = LengthMO.isImm(); 9404 bool IsRegForm = !IsImmForm; 9405 9406 // Build and insert one Opcode of Length, with special treatment for memset. 9407 auto insertMemMemOp = [&](MachineBasicBlock *InsMBB, 9408 MachineBasicBlock::iterator InsPos, 9409 MachineOperand DBase, uint64_t DDisp, 9410 MachineOperand SBase, uint64_t SDisp, 9411 unsigned Length) -> void { 9412 assert(Length > 0 && Length <= 256 && "Building memory op with bad length."); 9413 if (IsMemset) { 9414 MachineOperand ByteMO = earlyUseOperand(MI.getOperand(3)); 9415 if (ByteMO.isImm()) 9416 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::MVI)) 9417 .add(SBase).addImm(SDisp).add(ByteMO); 9418 else 9419 BuildMI(*InsMBB, InsPos, DL, TII->get(SystemZ::STC)) 9420 .add(ByteMO).add(SBase).addImm(SDisp).addReg(0); 9421 if (--Length == 0) 9422 return; 9423 } 9424 BuildMI(*MBB, InsPos, DL, TII->get(Opcode)) 9425 .add(DBase).addImm(DDisp).addImm(Length) 9426 .add(SBase).addImm(SDisp) 9427 .setMemRefs(MI.memoperands()); 9428 }; 9429 9430 bool NeedsLoop = false; 9431 uint64_t ImmLength = 0; 9432 Register LenAdjReg = SystemZ::NoRegister; 9433 if (IsImmForm) { 9434 ImmLength = LengthMO.getImm(); 9435 ImmLength += IsMemset ? 2 : 1; // Add back the subtracted adjustment. 9436 if (ImmLength == 0) { 9437 MI.eraseFromParent(); 9438 return MBB; 9439 } 9440 if (Opcode == SystemZ::CLC) { 9441 if (ImmLength > 3 * 256) 9442 // A two-CLC sequence is a clear win over a loop, not least because 9443 // it needs only one branch. A three-CLC sequence needs the same 9444 // number of branches as a loop (i.e. 2), but is shorter. That 9445 // brings us to lengths greater than 768 bytes. It seems relatively 9446 // likely that a difference will be found within the first 768 bytes, 9447 // so we just optimize for the smallest number of branch 9448 // instructions, in order to avoid polluting the prediction buffer 9449 // too much. 9450 NeedsLoop = true; 9451 } else if (ImmLength > 6 * 256) 9452 // The heuristic we use is to prefer loops for anything that would 9453 // require 7 or more MVCs. With these kinds of sizes there isn't much 9454 // to choose between straight-line code and looping code, since the 9455 // time will be dominated by the MVCs themselves. 9456 NeedsLoop = true; 9457 } else { 9458 NeedsLoop = true; 9459 LenAdjReg = LengthMO.getReg(); 9460 } 9461 9462 // When generating more than one CLC, all but the last will need to 9463 // branch to the end when a difference is found. 9464 MachineBasicBlock *EndMBB = 9465 (Opcode == SystemZ::CLC && (ImmLength > 256 || NeedsLoop) 9466 ? SystemZ::splitBlockAfter(MI, MBB) 9467 : nullptr); 9468 9469 if (NeedsLoop) { 9470 Register StartCountReg = 9471 MRI.createVirtualRegister(&SystemZ::GR64BitRegClass); 9472 if (IsImmForm) { 9473 TII->loadImmediate(*MBB, MI, StartCountReg, ImmLength / 256); 9474 ImmLength &= 255; 9475 } else { 9476 BuildMI(*MBB, MI, DL, TII->get(SystemZ::SRLG), StartCountReg) 9477 .addReg(LenAdjReg) 9478 .addReg(0) 9479 .addImm(8); 9480 } 9481 9482 bool HaveSingleBase = DestBase.isIdenticalTo(SrcBase); 9483 auto loadZeroAddress = [&]() -> MachineOperand { 9484 Register Reg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9485 BuildMI(*MBB, MI, DL, TII->get(SystemZ::LGHI), Reg).addImm(0); 9486 return MachineOperand::CreateReg(Reg, false); 9487 }; 9488 if (DestBase.isReg() && DestBase.getReg() == SystemZ::NoRegister) 9489 DestBase = loadZeroAddress(); 9490 if (SrcBase.isReg() && SrcBase.getReg() == SystemZ::NoRegister) 9491 SrcBase = HaveSingleBase ? DestBase : loadZeroAddress(); 9492 9493 MachineBasicBlock *StartMBB = nullptr; 9494 MachineBasicBlock *LoopMBB = nullptr; 9495 MachineBasicBlock *NextMBB = nullptr; 9496 MachineBasicBlock *DoneMBB = nullptr; 9497 MachineBasicBlock *AllDoneMBB = nullptr; 9498 9499 Register StartSrcReg = forceReg(MI, SrcBase, TII); 9500 Register StartDestReg = 9501 (HaveSingleBase ? StartSrcReg : forceReg(MI, DestBase, TII)); 9502 9503 const TargetRegisterClass *RC = &SystemZ::ADDR64BitRegClass; 9504 Register ThisSrcReg = MRI.createVirtualRegister(RC); 9505 Register ThisDestReg = 9506 (HaveSingleBase ? ThisSrcReg : MRI.createVirtualRegister(RC)); 9507 Register NextSrcReg = MRI.createVirtualRegister(RC); 9508 Register NextDestReg = 9509 (HaveSingleBase ? NextSrcReg : MRI.createVirtualRegister(RC)); 9510 RC = &SystemZ::GR64BitRegClass; 9511 Register ThisCountReg = MRI.createVirtualRegister(RC); 9512 Register NextCountReg = MRI.createVirtualRegister(RC); 9513 9514 if (IsRegForm) { 9515 AllDoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9516 StartMBB = SystemZ::emitBlockAfter(MBB); 9517 LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9518 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); 9519 DoneMBB = SystemZ::emitBlockAfter(NextMBB); 9520 9521 // MBB: 9522 // # Jump to AllDoneMBB if LenAdjReg means 0, or fall thru to StartMBB. 9523 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9524 .addReg(LenAdjReg).addImm(IsMemset ? -2 : -1); 9525 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9526 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9527 .addMBB(AllDoneMBB); 9528 MBB->addSuccessor(AllDoneMBB); 9529 if (!IsMemset) 9530 MBB->addSuccessor(StartMBB); 9531 else { 9532 // MemsetOneCheckMBB: 9533 // # Jump to MemsetOneMBB for a memset of length 1, or 9534 // # fall thru to StartMBB. 9535 MachineBasicBlock *MemsetOneCheckMBB = SystemZ::emitBlockAfter(MBB); 9536 MachineBasicBlock *MemsetOneMBB = SystemZ::emitBlockAfter(&*MF.rbegin()); 9537 MBB->addSuccessor(MemsetOneCheckMBB); 9538 MBB = MemsetOneCheckMBB; 9539 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9540 .addReg(LenAdjReg).addImm(-1); 9541 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9542 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9543 .addMBB(MemsetOneMBB); 9544 MBB->addSuccessor(MemsetOneMBB, {10, 100}); 9545 MBB->addSuccessor(StartMBB, {90, 100}); 9546 9547 // MemsetOneMBB: 9548 // # Jump back to AllDoneMBB after a single MVI or STC. 9549 MBB = MemsetOneMBB; 9550 insertMemMemOp(MBB, MBB->end(), 9551 MachineOperand::CreateReg(StartDestReg, false), DestDisp, 9552 MachineOperand::CreateReg(StartSrcReg, false), SrcDisp, 9553 1); 9554 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(AllDoneMBB); 9555 MBB->addSuccessor(AllDoneMBB); 9556 } 9557 9558 // StartMBB: 9559 // # Jump to DoneMBB if %StartCountReg is zero, or fall through to LoopMBB. 9560 MBB = StartMBB; 9561 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9562 .addReg(StartCountReg).addImm(0); 9563 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9564 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9565 .addMBB(DoneMBB); 9566 MBB->addSuccessor(DoneMBB); 9567 MBB->addSuccessor(LoopMBB); 9568 } 9569 else { 9570 StartMBB = MBB; 9571 DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9572 LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9573 NextMBB = (EndMBB ? SystemZ::emitBlockAfter(LoopMBB) : LoopMBB); 9574 9575 // StartMBB: 9576 // # fall through to LoopMBB 9577 MBB->addSuccessor(LoopMBB); 9578 9579 DestBase = MachineOperand::CreateReg(NextDestReg, false); 9580 SrcBase = MachineOperand::CreateReg(NextSrcReg, false); 9581 if (EndMBB && !ImmLength) 9582 // If the loop handled the whole CLC range, DoneMBB will be empty with 9583 // CC live-through into EndMBB, so add it as live-in. 9584 DoneMBB->addLiveIn(SystemZ::CC); 9585 } 9586 9587 // LoopMBB: 9588 // %ThisDestReg = phi [ %StartDestReg, StartMBB ], 9589 // [ %NextDestReg, NextMBB ] 9590 // %ThisSrcReg = phi [ %StartSrcReg, StartMBB ], 9591 // [ %NextSrcReg, NextMBB ] 9592 // %ThisCountReg = phi [ %StartCountReg, StartMBB ], 9593 // [ %NextCountReg, NextMBB ] 9594 // ( PFD 2, 768+DestDisp(%ThisDestReg) ) 9595 // Opcode DestDisp(256,%ThisDestReg), SrcDisp(%ThisSrcReg) 9596 // ( JLH EndMBB ) 9597 // 9598 // The prefetch is used only for MVC. The JLH is used only for CLC. 9599 MBB = LoopMBB; 9600 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisDestReg) 9601 .addReg(StartDestReg).addMBB(StartMBB) 9602 .addReg(NextDestReg).addMBB(NextMBB); 9603 if (!HaveSingleBase) 9604 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisSrcReg) 9605 .addReg(StartSrcReg).addMBB(StartMBB) 9606 .addReg(NextSrcReg).addMBB(NextMBB); 9607 BuildMI(MBB, DL, TII->get(SystemZ::PHI), ThisCountReg) 9608 .addReg(StartCountReg).addMBB(StartMBB) 9609 .addReg(NextCountReg).addMBB(NextMBB); 9610 if (Opcode == SystemZ::MVC) 9611 BuildMI(MBB, DL, TII->get(SystemZ::PFD)) 9612 .addImm(SystemZ::PFD_WRITE) 9613 .addReg(ThisDestReg).addImm(DestDisp - IsMemset + 768).addReg(0); 9614 insertMemMemOp(MBB, MBB->end(), 9615 MachineOperand::CreateReg(ThisDestReg, false), DestDisp, 9616 MachineOperand::CreateReg(ThisSrcReg, false), SrcDisp, 256); 9617 if (EndMBB) { 9618 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9619 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9620 .addMBB(EndMBB); 9621 MBB->addSuccessor(EndMBB); 9622 MBB->addSuccessor(NextMBB); 9623 } 9624 9625 // NextMBB: 9626 // %NextDestReg = LA 256(%ThisDestReg) 9627 // %NextSrcReg = LA 256(%ThisSrcReg) 9628 // %NextCountReg = AGHI %ThisCountReg, -1 9629 // CGHI %NextCountReg, 0 9630 // JLH LoopMBB 9631 // # fall through to DoneMBB 9632 // 9633 // The AGHI, CGHI and JLH should be converted to BRCTG by later passes. 9634 MBB = NextMBB; 9635 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextDestReg) 9636 .addReg(ThisDestReg).addImm(256).addReg(0); 9637 if (!HaveSingleBase) 9638 BuildMI(MBB, DL, TII->get(SystemZ::LA), NextSrcReg) 9639 .addReg(ThisSrcReg).addImm(256).addReg(0); 9640 BuildMI(MBB, DL, TII->get(SystemZ::AGHI), NextCountReg) 9641 .addReg(ThisCountReg).addImm(-1); 9642 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9643 .addReg(NextCountReg).addImm(0); 9644 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9645 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9646 .addMBB(LoopMBB); 9647 MBB->addSuccessor(LoopMBB); 9648 MBB->addSuccessor(DoneMBB); 9649 9650 MBB = DoneMBB; 9651 if (IsRegForm) { 9652 // DoneMBB: 9653 // # Make PHIs for RemDestReg/RemSrcReg as the loop may or may not run. 9654 // # Use EXecute Relative Long for the remainder of the bytes. The target 9655 // instruction of the EXRL will have a length field of 1 since 0 is an 9656 // illegal value. The number of bytes processed becomes (%LenAdjReg & 9657 // 0xff) + 1. 9658 // # Fall through to AllDoneMBB. 9659 Register RemSrcReg = MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9660 Register RemDestReg = HaveSingleBase ? RemSrcReg 9661 : MRI.createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9662 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemDestReg) 9663 .addReg(StartDestReg).addMBB(StartMBB) 9664 .addReg(NextDestReg).addMBB(NextMBB); 9665 if (!HaveSingleBase) 9666 BuildMI(MBB, DL, TII->get(SystemZ::PHI), RemSrcReg) 9667 .addReg(StartSrcReg).addMBB(StartMBB) 9668 .addReg(NextSrcReg).addMBB(NextMBB); 9669 if (IsMemset) 9670 insertMemMemOp(MBB, MBB->end(), 9671 MachineOperand::CreateReg(RemDestReg, false), DestDisp, 9672 MachineOperand::CreateReg(RemSrcReg, false), SrcDisp, 1); 9673 MachineInstrBuilder EXRL_MIB = 9674 BuildMI(MBB, DL, TII->get(SystemZ::EXRL_Pseudo)) 9675 .addImm(Opcode) 9676 .addReg(LenAdjReg) 9677 .addReg(RemDestReg).addImm(DestDisp) 9678 .addReg(RemSrcReg).addImm(SrcDisp); 9679 MBB->addSuccessor(AllDoneMBB); 9680 MBB = AllDoneMBB; 9681 if (Opcode != SystemZ::MVC) { 9682 EXRL_MIB.addReg(SystemZ::CC, RegState::ImplicitDefine); 9683 if (EndMBB) 9684 MBB->addLiveIn(SystemZ::CC); 9685 } 9686 } 9687 MF.getProperties().reset(MachineFunctionProperties::Property::NoPHIs); 9688 } 9689 9690 // Handle any remaining bytes with straight-line code. 9691 while (ImmLength > 0) { 9692 uint64_t ThisLength = std::min(ImmLength, uint64_t(256)); 9693 // The previous iteration might have created out-of-range displacements. 9694 // Apply them using LA/LAY if so. 9695 foldDisplIfNeeded(DestBase, DestDisp); 9696 foldDisplIfNeeded(SrcBase, SrcDisp); 9697 insertMemMemOp(MBB, MI, DestBase, DestDisp, SrcBase, SrcDisp, ThisLength); 9698 DestDisp += ThisLength; 9699 SrcDisp += ThisLength; 9700 ImmLength -= ThisLength; 9701 // If there's another CLC to go, branch to the end if a difference 9702 // was found. 9703 if (EndMBB && ImmLength > 0) { 9704 MachineBasicBlock *NextMBB = SystemZ::splitBlockBefore(MI, MBB); 9705 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9706 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_NE) 9707 .addMBB(EndMBB); 9708 MBB->addSuccessor(EndMBB); 9709 MBB->addSuccessor(NextMBB); 9710 MBB = NextMBB; 9711 } 9712 } 9713 if (EndMBB) { 9714 MBB->addSuccessor(EndMBB); 9715 MBB = EndMBB; 9716 MBB->addLiveIn(SystemZ::CC); 9717 } 9718 9719 MI.eraseFromParent(); 9720 return MBB; 9721 } 9722 9723 // Decompose string pseudo-instruction MI into a loop that continually performs 9724 // Opcode until CC != 3. 9725 MachineBasicBlock *SystemZTargetLowering::emitStringWrapper( 9726 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 9727 MachineFunction &MF = *MBB->getParent(); 9728 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9729 MachineRegisterInfo &MRI = MF.getRegInfo(); 9730 DebugLoc DL = MI.getDebugLoc(); 9731 9732 uint64_t End1Reg = MI.getOperand(0).getReg(); 9733 uint64_t Start1Reg = MI.getOperand(1).getReg(); 9734 uint64_t Start2Reg = MI.getOperand(2).getReg(); 9735 uint64_t CharReg = MI.getOperand(3).getReg(); 9736 9737 const TargetRegisterClass *RC = &SystemZ::GR64BitRegClass; 9738 uint64_t This1Reg = MRI.createVirtualRegister(RC); 9739 uint64_t This2Reg = MRI.createVirtualRegister(RC); 9740 uint64_t End2Reg = MRI.createVirtualRegister(RC); 9741 9742 MachineBasicBlock *StartMBB = MBB; 9743 MachineBasicBlock *DoneMBB = SystemZ::splitBlockBefore(MI, MBB); 9744 MachineBasicBlock *LoopMBB = SystemZ::emitBlockAfter(StartMBB); 9745 9746 // StartMBB: 9747 // # fall through to LoopMBB 9748 MBB->addSuccessor(LoopMBB); 9749 9750 // LoopMBB: 9751 // %This1Reg = phi [ %Start1Reg, StartMBB ], [ %End1Reg, LoopMBB ] 9752 // %This2Reg = phi [ %Start2Reg, StartMBB ], [ %End2Reg, LoopMBB ] 9753 // R0L = %CharReg 9754 // %End1Reg, %End2Reg = CLST %This1Reg, %This2Reg -- uses R0L 9755 // JO LoopMBB 9756 // # fall through to DoneMBB 9757 // 9758 // The load of R0L can be hoisted by post-RA LICM. 9759 MBB = LoopMBB; 9760 9761 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This1Reg) 9762 .addReg(Start1Reg).addMBB(StartMBB) 9763 .addReg(End1Reg).addMBB(LoopMBB); 9764 BuildMI(MBB, DL, TII->get(SystemZ::PHI), This2Reg) 9765 .addReg(Start2Reg).addMBB(StartMBB) 9766 .addReg(End2Reg).addMBB(LoopMBB); 9767 BuildMI(MBB, DL, TII->get(TargetOpcode::COPY), SystemZ::R0L).addReg(CharReg); 9768 BuildMI(MBB, DL, TII->get(Opcode)) 9769 .addReg(End1Reg, RegState::Define).addReg(End2Reg, RegState::Define) 9770 .addReg(This1Reg).addReg(This2Reg); 9771 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9772 .addImm(SystemZ::CCMASK_ANY).addImm(SystemZ::CCMASK_3).addMBB(LoopMBB); 9773 MBB->addSuccessor(LoopMBB); 9774 MBB->addSuccessor(DoneMBB); 9775 9776 DoneMBB->addLiveIn(SystemZ::CC); 9777 9778 MI.eraseFromParent(); 9779 return DoneMBB; 9780 } 9781 9782 // Update TBEGIN instruction with final opcode and register clobbers. 9783 MachineBasicBlock *SystemZTargetLowering::emitTransactionBegin( 9784 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode, 9785 bool NoFloat) const { 9786 MachineFunction &MF = *MBB->getParent(); 9787 const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); 9788 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9789 9790 // Update opcode. 9791 MI.setDesc(TII->get(Opcode)); 9792 9793 // We cannot handle a TBEGIN that clobbers the stack or frame pointer. 9794 // Make sure to add the corresponding GRSM bits if they are missing. 9795 uint64_t Control = MI.getOperand(2).getImm(); 9796 static const unsigned GPRControlBit[16] = { 9797 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, 9798 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 9799 }; 9800 Control |= GPRControlBit[15]; 9801 if (TFI->hasFP(MF)) 9802 Control |= GPRControlBit[11]; 9803 MI.getOperand(2).setImm(Control); 9804 9805 // Add GPR clobbers. 9806 for (int I = 0; I < 16; I++) { 9807 if ((Control & GPRControlBit[I]) == 0) { 9808 unsigned Reg = SystemZMC::GR64Regs[I]; 9809 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9810 } 9811 } 9812 9813 // Add FPR/VR clobbers. 9814 if (!NoFloat && (Control & 4) != 0) { 9815 if (Subtarget.hasVector()) { 9816 for (unsigned Reg : SystemZMC::VR128Regs) { 9817 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9818 } 9819 } else { 9820 for (unsigned Reg : SystemZMC::FP64Regs) { 9821 MI.addOperand(MachineOperand::CreateReg(Reg, true, true)); 9822 } 9823 } 9824 } 9825 9826 return MBB; 9827 } 9828 9829 MachineBasicBlock *SystemZTargetLowering::emitLoadAndTestCmp0( 9830 MachineInstr &MI, MachineBasicBlock *MBB, unsigned Opcode) const { 9831 MachineFunction &MF = *MBB->getParent(); 9832 MachineRegisterInfo *MRI = &MF.getRegInfo(); 9833 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9834 DebugLoc DL = MI.getDebugLoc(); 9835 9836 Register SrcReg = MI.getOperand(0).getReg(); 9837 9838 // Create new virtual register of the same class as source. 9839 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 9840 Register DstReg = MRI->createVirtualRegister(RC); 9841 9842 // Replace pseudo with a normal load-and-test that models the def as 9843 // well. 9844 BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg) 9845 .addReg(SrcReg) 9846 .setMIFlags(MI.getFlags()); 9847 MI.eraseFromParent(); 9848 9849 return MBB; 9850 } 9851 9852 MachineBasicBlock *SystemZTargetLowering::emitProbedAlloca( 9853 MachineInstr &MI, MachineBasicBlock *MBB) const { 9854 MachineFunction &MF = *MBB->getParent(); 9855 MachineRegisterInfo *MRI = &MF.getRegInfo(); 9856 const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); 9857 DebugLoc DL = MI.getDebugLoc(); 9858 const unsigned ProbeSize = getStackProbeSize(MF); 9859 Register DstReg = MI.getOperand(0).getReg(); 9860 Register SizeReg = MI.getOperand(2).getReg(); 9861 9862 MachineBasicBlock *StartMBB = MBB; 9863 MachineBasicBlock *DoneMBB = SystemZ::splitBlockAfter(MI, MBB); 9864 MachineBasicBlock *LoopTestMBB = SystemZ::emitBlockAfter(StartMBB); 9865 MachineBasicBlock *LoopBodyMBB = SystemZ::emitBlockAfter(LoopTestMBB); 9866 MachineBasicBlock *TailTestMBB = SystemZ::emitBlockAfter(LoopBodyMBB); 9867 MachineBasicBlock *TailMBB = SystemZ::emitBlockAfter(TailTestMBB); 9868 9869 MachineMemOperand *VolLdMMO = MF.getMachineMemOperand(MachinePointerInfo(), 9870 MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad, 8, Align(1)); 9871 9872 Register PHIReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9873 Register IncReg = MRI->createVirtualRegister(&SystemZ::ADDR64BitRegClass); 9874 9875 // LoopTestMBB 9876 // BRC TailTestMBB 9877 // # fallthrough to LoopBodyMBB 9878 StartMBB->addSuccessor(LoopTestMBB); 9879 MBB = LoopTestMBB; 9880 BuildMI(MBB, DL, TII->get(SystemZ::PHI), PHIReg) 9881 .addReg(SizeReg) 9882 .addMBB(StartMBB) 9883 .addReg(IncReg) 9884 .addMBB(LoopBodyMBB); 9885 BuildMI(MBB, DL, TII->get(SystemZ::CLGFI)) 9886 .addReg(PHIReg) 9887 .addImm(ProbeSize); 9888 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9889 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_LT) 9890 .addMBB(TailTestMBB); 9891 MBB->addSuccessor(LoopBodyMBB); 9892 MBB->addSuccessor(TailTestMBB); 9893 9894 // LoopBodyMBB: Allocate and probe by means of a volatile compare. 9895 // J LoopTestMBB 9896 MBB = LoopBodyMBB; 9897 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), IncReg) 9898 .addReg(PHIReg) 9899 .addImm(ProbeSize); 9900 BuildMI(MBB, DL, TII->get(SystemZ::SLGFI), SystemZ::R15D) 9901 .addReg(SystemZ::R15D) 9902 .addImm(ProbeSize); 9903 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) 9904 .addReg(SystemZ::R15D).addImm(ProbeSize - 8).addReg(0) 9905 .setMemRefs(VolLdMMO); 9906 BuildMI(MBB, DL, TII->get(SystemZ::J)).addMBB(LoopTestMBB); 9907 MBB->addSuccessor(LoopTestMBB); 9908 9909 // TailTestMBB 9910 // BRC DoneMBB 9911 // # fallthrough to TailMBB 9912 MBB = TailTestMBB; 9913 BuildMI(MBB, DL, TII->get(SystemZ::CGHI)) 9914 .addReg(PHIReg) 9915 .addImm(0); 9916 BuildMI(MBB, DL, TII->get(SystemZ::BRC)) 9917 .addImm(SystemZ::CCMASK_ICMP).addImm(SystemZ::CCMASK_CMP_EQ) 9918 .addMBB(DoneMBB); 9919 MBB->addSuccessor(TailMBB); 9920 MBB->addSuccessor(DoneMBB); 9921 9922 // TailMBB 9923 // # fallthrough to DoneMBB 9924 MBB = TailMBB; 9925 BuildMI(MBB, DL, TII->get(SystemZ::SLGR), SystemZ::R15D) 9926 .addReg(SystemZ::R15D) 9927 .addReg(PHIReg); 9928 BuildMI(MBB, DL, TII->get(SystemZ::CG)).addReg(SystemZ::R15D) 9929 .addReg(SystemZ::R15D).addImm(-8).addReg(PHIReg) 9930 .setMemRefs(VolLdMMO); 9931 MBB->addSuccessor(DoneMBB); 9932 9933 // DoneMBB 9934 MBB = DoneMBB; 9935 BuildMI(*MBB, MBB->begin(), DL, TII->get(TargetOpcode::COPY), DstReg) 9936 .addReg(SystemZ::R15D); 9937 9938 MI.eraseFromParent(); 9939 return DoneMBB; 9940 } 9941 9942 SDValue SystemZTargetLowering:: 9943 getBackchainAddress(SDValue SP, SelectionDAG &DAG) const { 9944 MachineFunction &MF = DAG.getMachineFunction(); 9945 auto *TFL = Subtarget.getFrameLowering<SystemZELFFrameLowering>(); 9946 SDLoc DL(SP); 9947 return DAG.getNode(ISD::ADD, DL, MVT::i64, SP, 9948 DAG.getIntPtrConstant(TFL->getBackchainOffset(MF), DL)); 9949 } 9950 9951 MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter( 9952 MachineInstr &MI, MachineBasicBlock *MBB) const { 9953 switch (MI.getOpcode()) { 9954 case SystemZ::ADJCALLSTACKDOWN: 9955 case SystemZ::ADJCALLSTACKUP: 9956 return emitAdjCallStack(MI, MBB); 9957 9958 case SystemZ::Select32: 9959 case SystemZ::Select64: 9960 case SystemZ::Select128: 9961 case SystemZ::SelectF32: 9962 case SystemZ::SelectF64: 9963 case SystemZ::SelectF128: 9964 case SystemZ::SelectVR32: 9965 case SystemZ::SelectVR64: 9966 case SystemZ::SelectVR128: 9967 return emitSelect(MI, MBB); 9968 9969 case SystemZ::CondStore8Mux: 9970 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false); 9971 case SystemZ::CondStore8MuxInv: 9972 return emitCondStore(MI, MBB, SystemZ::STCMux, 0, true); 9973 case SystemZ::CondStore16Mux: 9974 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false); 9975 case SystemZ::CondStore16MuxInv: 9976 return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true); 9977 case SystemZ::CondStore32Mux: 9978 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false); 9979 case SystemZ::CondStore32MuxInv: 9980 return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true); 9981 case SystemZ::CondStore8: 9982 return emitCondStore(MI, MBB, SystemZ::STC, 0, false); 9983 case SystemZ::CondStore8Inv: 9984 return emitCondStore(MI, MBB, SystemZ::STC, 0, true); 9985 case SystemZ::CondStore16: 9986 return emitCondStore(MI, MBB, SystemZ::STH, 0, false); 9987 case SystemZ::CondStore16Inv: 9988 return emitCondStore(MI, MBB, SystemZ::STH, 0, true); 9989 case SystemZ::CondStore32: 9990 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, false); 9991 case SystemZ::CondStore32Inv: 9992 return emitCondStore(MI, MBB, SystemZ::ST, SystemZ::STOC, true); 9993 case SystemZ::CondStore64: 9994 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, false); 9995 case SystemZ::CondStore64Inv: 9996 return emitCondStore(MI, MBB, SystemZ::STG, SystemZ::STOCG, true); 9997 case SystemZ::CondStoreF32: 9998 return emitCondStore(MI, MBB, SystemZ::STE, 0, false); 9999 case SystemZ::CondStoreF32Inv: 10000 return emitCondStore(MI, MBB, SystemZ::STE, 0, true); 10001 case SystemZ::CondStoreF64: 10002 return emitCondStore(MI, MBB, SystemZ::STD, 0, false); 10003 case SystemZ::CondStoreF64Inv: 10004 return emitCondStore(MI, MBB, SystemZ::STD, 0, true); 10005 10006 case SystemZ::SCmp128Hi: 10007 return emitICmp128Hi(MI, MBB, false); 10008 case SystemZ::UCmp128Hi: 10009 return emitICmp128Hi(MI, MBB, true); 10010 10011 case SystemZ::PAIR128: 10012 return emitPair128(MI, MBB); 10013 case SystemZ::AEXT128: 10014 return emitExt128(MI, MBB, false); 10015 case SystemZ::ZEXT128: 10016 return emitExt128(MI, MBB, true); 10017 10018 case SystemZ::ATOMIC_SWAPW: 10019 return emitAtomicLoadBinary(MI, MBB, 0); 10020 10021 case SystemZ::ATOMIC_LOADW_AR: 10022 return emitAtomicLoadBinary(MI, MBB, SystemZ::AR); 10023 case SystemZ::ATOMIC_LOADW_AFI: 10024 return emitAtomicLoadBinary(MI, MBB, SystemZ::AFI); 10025 10026 case SystemZ::ATOMIC_LOADW_SR: 10027 return emitAtomicLoadBinary(MI, MBB, SystemZ::SR); 10028 10029 case SystemZ::ATOMIC_LOADW_NR: 10030 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR); 10031 case SystemZ::ATOMIC_LOADW_NILH: 10032 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH); 10033 10034 case SystemZ::ATOMIC_LOADW_OR: 10035 return emitAtomicLoadBinary(MI, MBB, SystemZ::OR); 10036 case SystemZ::ATOMIC_LOADW_OILH: 10037 return emitAtomicLoadBinary(MI, MBB, SystemZ::OILH); 10038 10039 case SystemZ::ATOMIC_LOADW_XR: 10040 return emitAtomicLoadBinary(MI, MBB, SystemZ::XR); 10041 case SystemZ::ATOMIC_LOADW_XILF: 10042 return emitAtomicLoadBinary(MI, MBB, SystemZ::XILF); 10043 10044 case SystemZ::ATOMIC_LOADW_NRi: 10045 return emitAtomicLoadBinary(MI, MBB, SystemZ::NR, true); 10046 case SystemZ::ATOMIC_LOADW_NILHi: 10047 return emitAtomicLoadBinary(MI, MBB, SystemZ::NILH, true); 10048 10049 case SystemZ::ATOMIC_LOADW_MIN: 10050 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_LE); 10051 case SystemZ::ATOMIC_LOADW_MAX: 10052 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CR, SystemZ::CCMASK_CMP_GE); 10053 case SystemZ::ATOMIC_LOADW_UMIN: 10054 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_LE); 10055 case SystemZ::ATOMIC_LOADW_UMAX: 10056 return emitAtomicLoadMinMax(MI, MBB, SystemZ::CLR, SystemZ::CCMASK_CMP_GE); 10057 10058 case SystemZ::ATOMIC_CMP_SWAPW: 10059 return emitAtomicCmpSwapW(MI, MBB); 10060 case SystemZ::MVCImm: 10061 case SystemZ::MVCReg: 10062 return emitMemMemWrapper(MI, MBB, SystemZ::MVC); 10063 case SystemZ::NCImm: 10064 return emitMemMemWrapper(MI, MBB, SystemZ::NC); 10065 case SystemZ::OCImm: 10066 return emitMemMemWrapper(MI, MBB, SystemZ::OC); 10067 case SystemZ::XCImm: 10068 case SystemZ::XCReg: 10069 return emitMemMemWrapper(MI, MBB, SystemZ::XC); 10070 case SystemZ::CLCImm: 10071 case SystemZ::CLCReg: 10072 return emitMemMemWrapper(MI, MBB, SystemZ::CLC); 10073 case SystemZ::MemsetImmImm: 10074 case SystemZ::MemsetImmReg: 10075 case SystemZ::MemsetRegImm: 10076 case SystemZ::MemsetRegReg: 10077 return emitMemMemWrapper(MI, MBB, SystemZ::MVC, true/*IsMemset*/); 10078 case SystemZ::CLSTLoop: 10079 return emitStringWrapper(MI, MBB, SystemZ::CLST); 10080 case SystemZ::MVSTLoop: 10081 return emitStringWrapper(MI, MBB, SystemZ::MVST); 10082 case SystemZ::SRSTLoop: 10083 return emitStringWrapper(MI, MBB, SystemZ::SRST); 10084 case SystemZ::TBEGIN: 10085 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); 10086 case SystemZ::TBEGIN_nofloat: 10087 return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); 10088 case SystemZ::TBEGINC: 10089 return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); 10090 case SystemZ::LTEBRCompare_Pseudo: 10091 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTEBR); 10092 case SystemZ::LTDBRCompare_Pseudo: 10093 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTDBR); 10094 case SystemZ::LTXBRCompare_Pseudo: 10095 return emitLoadAndTestCmp0(MI, MBB, SystemZ::LTXBR); 10096 10097 case SystemZ::PROBED_ALLOCA: 10098 return emitProbedAlloca(MI, MBB); 10099 case SystemZ::EH_SjLj_SetJmp: 10100 return emitEHSjLjSetJmp(MI, MBB); 10101 case SystemZ::EH_SjLj_LongJmp: 10102 return emitEHSjLjLongJmp(MI, MBB); 10103 10104 case TargetOpcode::STACKMAP: 10105 case TargetOpcode::PATCHPOINT: 10106 return emitPatchPoint(MI, MBB); 10107 10108 default: 10109 llvm_unreachable("Unexpected instr type to insert"); 10110 } 10111 } 10112 10113 // This is only used by the isel schedulers, and is needed only to prevent 10114 // compiler from crashing when list-ilp is used. 10115 const TargetRegisterClass * 10116 SystemZTargetLowering::getRepRegClassFor(MVT VT) const { 10117 if (VT == MVT::Untyped) 10118 return &SystemZ::ADDR128BitRegClass; 10119 return TargetLowering::getRepRegClassFor(VT); 10120 } 10121 10122 SDValue SystemZTargetLowering::lowerGET_ROUNDING(SDValue Op, 10123 SelectionDAG &DAG) const { 10124 SDLoc dl(Op); 10125 /* 10126 The rounding method is in FPC Byte 3 bits 6-7, and has the following 10127 settings: 10128 00 Round to nearest 10129 01 Round to 0 10130 10 Round to +inf 10131 11 Round to -inf 10132 10133 FLT_ROUNDS, on the other hand, expects the following: 10134 -1 Undefined 10135 0 Round to 0 10136 1 Round to nearest 10137 2 Round to +inf 10138 3 Round to -inf 10139 */ 10140 10141 // Save FPC to register. 10142 SDValue Chain = Op.getOperand(0); 10143 SDValue EFPC( 10144 DAG.getMachineNode(SystemZ::EFPC, dl, {MVT::i32, MVT::Other}, Chain), 0); 10145 Chain = EFPC.getValue(1); 10146 10147 // Transform as necessary 10148 SDValue CWD1 = DAG.getNode(ISD::AND, dl, MVT::i32, EFPC, 10149 DAG.getConstant(3, dl, MVT::i32)); 10150 // RetVal = (CWD1 ^ (CWD1 >> 1)) ^ 1 10151 SDValue CWD2 = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, 10152 DAG.getNode(ISD::SRL, dl, MVT::i32, CWD1, 10153 DAG.getConstant(1, dl, MVT::i32))); 10154 10155 SDValue RetVal = DAG.getNode(ISD::XOR, dl, MVT::i32, CWD2, 10156 DAG.getConstant(1, dl, MVT::i32)); 10157 RetVal = DAG.getZExtOrTrunc(RetVal, dl, Op.getValueType()); 10158 10159 return DAG.getMergeValues({RetVal, Chain}, dl); 10160 } 10161 10162 SDValue SystemZTargetLowering::lowerVECREDUCE_ADD(SDValue Op, 10163 SelectionDAG &DAG) const { 10164 EVT VT = Op.getValueType(); 10165 Op = Op.getOperand(0); 10166 EVT OpVT = Op.getValueType(); 10167 10168 assert(OpVT.isVector() && "Operand type for VECREDUCE_ADD is not a vector."); 10169 10170 SDLoc DL(Op); 10171 10172 // load a 0 vector for the third operand of VSUM. 10173 SDValue Zero = DAG.getSplatBuildVector(OpVT, DL, DAG.getConstant(0, DL, VT)); 10174 10175 // execute VSUM. 10176 switch (OpVT.getScalarSizeInBits()) { 10177 case 8: 10178 case 16: 10179 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Zero); 10180 [[fallthrough]]; 10181 case 32: 10182 case 64: 10183 Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::i128, Op, 10184 DAG.getBitcast(Op.getValueType(), Zero)); 10185 break; 10186 case 128: 10187 break; // VSUM over v1i128 should not happen and would be a noop 10188 default: 10189 llvm_unreachable("Unexpected scalar size."); 10190 } 10191 // Cast to original vector type, retrieve last element. 10192 return DAG.getNode( 10193 ISD::EXTRACT_VECTOR_ELT, DL, VT, DAG.getBitcast(OpVT, Op), 10194 DAG.getConstant(OpVT.getVectorNumElements() - 1, DL, MVT::i32)); 10195 } 10196 10197 // Only consider a function fully internal as long as it has local linkage 10198 // and is not used in any other way than acting as the called function at 10199 // call sites. 10200 bool SystemZTargetLowering::isFullyInternal(const Function *Fn) const { 10201 if (!Fn->hasLocalLinkage()) 10202 return false; 10203 for (const User *U : Fn->users()) { 10204 if (auto *CB = dyn_cast<CallBase>(U)) { 10205 if (CB->getCalledFunction() != Fn) 10206 return false; 10207 } else 10208 return false; 10209 } 10210 return true; 10211 } 10212 10213 static void printFunctionArgExts(const Function *F, raw_fd_ostream &OS) { 10214 FunctionType *FT = F->getFunctionType(); 10215 const AttributeList &Attrs = F->getAttributes(); 10216 if (Attrs.hasRetAttrs()) 10217 OS << Attrs.getAsString(AttributeList::ReturnIndex) << " "; 10218 OS << *F->getReturnType() << " @" << F->getName() << "("; 10219 for (unsigned I = 0, E = FT->getNumParams(); I != E; ++I) { 10220 if (I) 10221 OS << ", "; 10222 OS << *FT->getParamType(I); 10223 AttributeSet ArgAttrs = Attrs.getParamAttrs(I); 10224 for (auto A : {Attribute::SExt, Attribute::ZExt, Attribute::NoExt}) 10225 if (ArgAttrs.hasAttribute(A)) 10226 OS << " " << Attribute::getNameFromAttrKind(A); 10227 } 10228 OS << ")\n"; 10229 } 10230 10231 void SystemZTargetLowering:: 10232 verifyNarrowIntegerArgs_Call(const SmallVectorImpl<ISD::OutputArg> &Outs, 10233 const Function *F, SDValue Callee) const { 10234 bool IsInternal = false; 10235 const Function *CalleeFn = nullptr; 10236 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) 10237 if ((CalleeFn = dyn_cast<Function>(G->getGlobal()))) 10238 IsInternal = isFullyInternal(CalleeFn); 10239 if (!verifyNarrowIntegerArgs(Outs, IsInternal)) { 10240 errs() << "ERROR: Missing extension attribute of passed " 10241 << "value in call to function:\n" << "Callee: "; 10242 if (CalleeFn != nullptr) 10243 printFunctionArgExts(CalleeFn, errs()); 10244 else 10245 errs() << "-\n"; 10246 errs() << "Caller: "; 10247 printFunctionArgExts(F, errs()); 10248 llvm_unreachable(""); 10249 } 10250 } 10251 10252 void SystemZTargetLowering:: 10253 verifyNarrowIntegerArgs_Ret(const SmallVectorImpl<ISD::OutputArg> &Outs, 10254 const Function *F) const { 10255 if (!verifyNarrowIntegerArgs(Outs, isFullyInternal(F))) { 10256 errs() << "ERROR: Missing extension attribute of returned " 10257 << "value from function:\n"; 10258 printFunctionArgExts(F, errs()); 10259 llvm_unreachable(""); 10260 } 10261 } 10262 10263 // Verify that narrow integer arguments are extended as required by the ABI. 10264 // Return false if an error is found. 10265 bool SystemZTargetLowering:: 10266 verifyNarrowIntegerArgs(const SmallVectorImpl<ISD::OutputArg> &Outs, 10267 bool IsInternal) const { 10268 if (IsInternal || !Subtarget.isTargetELF()) 10269 return true; 10270 10271 // Temporarily only do the check when explicitly requested, until it can be 10272 // enabled by default. 10273 if (!EnableIntArgExtCheck) 10274 return true; 10275 10276 if (EnableIntArgExtCheck.getNumOccurrences()) { 10277 if (!EnableIntArgExtCheck) 10278 return true; 10279 } else if (!getTargetMachine().Options.VerifyArgABICompliance) 10280 return true; 10281 10282 for (unsigned i = 0; i < Outs.size(); ++i) { 10283 MVT VT = Outs[i].VT; 10284 ISD::ArgFlagsTy Flags = Outs[i].Flags; 10285 if (VT.isInteger()) { 10286 assert((VT == MVT::i32 || VT.getSizeInBits() >= 64) && 10287 "Unexpected integer argument VT."); 10288 if (VT == MVT::i32 && 10289 !Flags.isSExt() && !Flags.isZExt() && !Flags.isNoExt()) 10290 return false; 10291 } 10292 } 10293 10294 return true; 10295 } 10296