1 //===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements routines for translating from LLVM IR into SelectionDAG IR. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "SelectionDAGBuilder.h" 15 #include "SDNodeDbgValue.h" 16 #include "llvm/ADT/BitVector.h" 17 #include "llvm/ADT/Optional.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/Analysis/BranchProbabilityInfo.h" 22 #include "llvm/Analysis/ConstantFolding.h" 23 #include "llvm/Analysis/TargetLibraryInfo.h" 24 #include "llvm/Analysis/ValueTracking.h" 25 #include "llvm/CodeGen/Analysis.h" 26 #include "llvm/CodeGen/FastISel.h" 27 #include "llvm/CodeGen/FunctionLoweringInfo.h" 28 #include "llvm/CodeGen/GCMetadata.h" 29 #include "llvm/CodeGen/GCStrategy.h" 30 #include "llvm/CodeGen/MachineFrameInfo.h" 31 #include "llvm/CodeGen/MachineFunction.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineJumpTableInfo.h" 34 #include "llvm/CodeGen/MachineModuleInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/SelectionDAG.h" 37 #include "llvm/CodeGen/StackMaps.h" 38 #include "llvm/CodeGen/WinEHFuncInfo.h" 39 #include "llvm/IR/CallingConv.h" 40 #include "llvm/IR/Constants.h" 41 #include "llvm/IR/DataLayout.h" 42 #include "llvm/IR/DebugInfo.h" 43 #include "llvm/IR/DerivedTypes.h" 44 #include "llvm/IR/Function.h" 45 #include "llvm/IR/GlobalVariable.h" 46 #include "llvm/IR/InlineAsm.h" 47 #include "llvm/IR/Instructions.h" 48 #include "llvm/IR/IntrinsicInst.h" 49 #include "llvm/IR/Intrinsics.h" 50 #include "llvm/IR/LLVMContext.h" 51 #include "llvm/IR/Module.h" 52 #include "llvm/IR/Statepoint.h" 53 #include "llvm/MC/MCSymbol.h" 54 #include "llvm/Support/CommandLine.h" 55 #include "llvm/Support/Debug.h" 56 #include "llvm/Support/ErrorHandling.h" 57 #include "llvm/Support/MathExtras.h" 58 #include "llvm/Support/raw_ostream.h" 59 #include "llvm/Target/TargetFrameLowering.h" 60 #include "llvm/Target/TargetInstrInfo.h" 61 #include "llvm/Target/TargetIntrinsicInfo.h" 62 #include "llvm/Target/TargetLowering.h" 63 #include "llvm/Target/TargetOptions.h" 64 #include "llvm/Target/TargetSelectionDAGInfo.h" 65 #include "llvm/Target/TargetSubtargetInfo.h" 66 #include <algorithm> 67 using namespace llvm; 68 69 #define DEBUG_TYPE "isel" 70 71 /// LimitFloatPrecision - Generate low-precision inline sequences for 72 /// some float libcalls (6, 8 or 12 bits). 73 static unsigned LimitFloatPrecision; 74 75 static cl::opt<unsigned, true> 76 LimitFPPrecision("limit-float-precision", 77 cl::desc("Generate low-precision inline sequences " 78 "for some float libcalls"), 79 cl::location(LimitFloatPrecision), 80 cl::init(0)); 81 82 // Limit the width of DAG chains. This is important in general to prevent 83 // prevent DAG-based analysis from blowing up. For example, alias analysis and 84 // load clustering may not complete in reasonable time. It is difficult to 85 // recognize and avoid this situation within each individual analysis, and 86 // future analyses are likely to have the same behavior. Limiting DAG width is 87 // the safe approach, and will be especially important with global DAGs. 88 // 89 // MaxParallelChains default is arbitrarily high to avoid affecting 90 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st 91 // sequence over this should have been converted to llvm.memcpy by the 92 // frontend. It easy to induce this behavior with .ll code such as: 93 // %buffer = alloca [4096 x i8] 94 // %data = load [4096 x i8]* %argPtr 95 // store [4096 x i8] %data, [4096 x i8]* %buffer 96 static const unsigned MaxParallelChains = 64; 97 98 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, 99 const SDValue *Parts, unsigned NumParts, 100 MVT PartVT, EVT ValueVT, const Value *V); 101 102 /// getCopyFromParts - Create a value that contains the specified legal parts 103 /// combined into the value they represent. If the parts combine to a type 104 /// larger then ValueVT then AssertOp can be used to specify whether the extra 105 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT 106 /// (ISD::AssertSext). 107 static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, 108 const SDValue *Parts, 109 unsigned NumParts, MVT PartVT, EVT ValueVT, 110 const Value *V, 111 ISD::NodeType AssertOp = ISD::DELETED_NODE) { 112 if (ValueVT.isVector()) 113 return getCopyFromPartsVector(DAG, DL, Parts, NumParts, 114 PartVT, ValueVT, V); 115 116 assert(NumParts > 0 && "No parts to assemble!"); 117 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 118 SDValue Val = Parts[0]; 119 120 if (NumParts > 1) { 121 // Assemble the value from multiple parts. 122 if (ValueVT.isInteger()) { 123 unsigned PartBits = PartVT.getSizeInBits(); 124 unsigned ValueBits = ValueVT.getSizeInBits(); 125 126 // Assemble the power of 2 part. 127 unsigned RoundParts = NumParts & (NumParts - 1) ? 128 1 << Log2_32(NumParts) : NumParts; 129 unsigned RoundBits = PartBits * RoundParts; 130 EVT RoundVT = RoundBits == ValueBits ? 131 ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits); 132 SDValue Lo, Hi; 133 134 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2); 135 136 if (RoundParts > 2) { 137 Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2, 138 PartVT, HalfVT, V); 139 Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2, 140 RoundParts / 2, PartVT, HalfVT, V); 141 } else { 142 Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]); 143 Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]); 144 } 145 146 if (TLI.isBigEndian()) 147 std::swap(Lo, Hi); 148 149 Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi); 150 151 if (RoundParts < NumParts) { 152 // Assemble the trailing non-power-of-2 part. 153 unsigned OddParts = NumParts - RoundParts; 154 EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits); 155 Hi = getCopyFromParts(DAG, DL, 156 Parts + RoundParts, OddParts, PartVT, OddVT, V); 157 158 // Combine the round and odd parts. 159 Lo = Val; 160 if (TLI.isBigEndian()) 161 std::swap(Lo, Hi); 162 EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 163 Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi); 164 Hi = DAG.getNode(ISD::SHL, DL, TotalVT, Hi, 165 DAG.getConstant(Lo.getValueType().getSizeInBits(), 166 TLI.getPointerTy())); 167 Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo); 168 Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi); 169 } 170 } else if (PartVT.isFloatingPoint()) { 171 // FP split into multiple FP parts (for ppcf128) 172 assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && 173 "Unexpected split"); 174 SDValue Lo, Hi; 175 Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); 176 Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); 177 if (TLI.hasBigEndianPartOrdering(ValueVT)) 178 std::swap(Lo, Hi); 179 Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); 180 } else { 181 // FP split into integer parts (soft fp) 182 assert(ValueVT.isFloatingPoint() && PartVT.isInteger() && 183 !PartVT.isVector() && "Unexpected split"); 184 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits()); 185 Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V); 186 } 187 } 188 189 // There is now one part, held in Val. Correct it to match ValueVT. 190 EVT PartEVT = Val.getValueType(); 191 192 if (PartEVT == ValueVT) 193 return Val; 194 195 if (PartEVT.isInteger() && ValueVT.isInteger()) { 196 if (ValueVT.bitsLT(PartEVT)) { 197 // For a truncate, see if we have any information to 198 // indicate whether the truncated bits will always be 199 // zero or sign-extension. 200 if (AssertOp != ISD::DELETED_NODE) 201 Val = DAG.getNode(AssertOp, DL, PartEVT, Val, 202 DAG.getValueType(ValueVT)); 203 return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 204 } 205 return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); 206 } 207 208 if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { 209 // FP_ROUND's are always exact here. 210 if (ValueVT.bitsLT(Val.getValueType())) 211 return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, 212 DAG.getTargetConstant(1, TLI.getPointerTy())); 213 214 return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); 215 } 216 217 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) 218 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 219 220 llvm_unreachable("Unknown mismatch!"); 221 } 222 223 static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V, 224 const Twine &ErrMsg) { 225 const Instruction *I = dyn_cast_or_null<Instruction>(V); 226 if (!V) 227 return Ctx.emitError(ErrMsg); 228 229 const char *AsmError = ", possible invalid constraint for vector type"; 230 if (const CallInst *CI = dyn_cast<CallInst>(I)) 231 if (isa<InlineAsm>(CI->getCalledValue())) 232 return Ctx.emitError(I, ErrMsg + AsmError); 233 234 return Ctx.emitError(I, ErrMsg); 235 } 236 237 /// getCopyFromPartsVector - Create a value that contains the specified legal 238 /// parts combined into the value they represent. If the parts combine to a 239 /// type larger then ValueVT then AssertOp can be used to specify whether the 240 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from 241 /// ValueVT (ISD::AssertSext). 242 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, 243 const SDValue *Parts, unsigned NumParts, 244 MVT PartVT, EVT ValueVT, const Value *V) { 245 assert(ValueVT.isVector() && "Not a vector value"); 246 assert(NumParts > 0 && "No parts to assemble!"); 247 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 248 SDValue Val = Parts[0]; 249 250 // Handle a multi-element vector. 251 if (NumParts > 1) { 252 EVT IntermediateVT; 253 MVT RegisterVT; 254 unsigned NumIntermediates; 255 unsigned NumRegs = 256 TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, 257 NumIntermediates, RegisterVT); 258 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); 259 NumParts = NumRegs; // Silence a compiler warning. 260 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); 261 assert(RegisterVT == Parts[0].getSimpleValueType() && 262 "Part type doesn't match part!"); 263 264 // Assemble the parts into intermediate operands. 265 SmallVector<SDValue, 8> Ops(NumIntermediates); 266 if (NumIntermediates == NumParts) { 267 // If the register was not expanded, truncate or copy the value, 268 // as appropriate. 269 for (unsigned i = 0; i != NumParts; ++i) 270 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1, 271 PartVT, IntermediateVT, V); 272 } else if (NumParts > 0) { 273 // If the intermediate type was expanded, build the intermediate 274 // operands from the parts. 275 assert(NumParts % NumIntermediates == 0 && 276 "Must expand into a divisible number of parts!"); 277 unsigned Factor = NumParts / NumIntermediates; 278 for (unsigned i = 0; i != NumIntermediates; ++i) 279 Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor, 280 PartVT, IntermediateVT, V); 281 } 282 283 // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the 284 // intermediate operands. 285 Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS 286 : ISD::BUILD_VECTOR, 287 DL, ValueVT, Ops); 288 } 289 290 // There is now one part, held in Val. Correct it to match ValueVT. 291 EVT PartEVT = Val.getValueType(); 292 293 if (PartEVT == ValueVT) 294 return Val; 295 296 if (PartEVT.isVector()) { 297 // If the element type of the source/dest vectors are the same, but the 298 // parts vector has more elements than the value vector, then we have a 299 // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the 300 // elements we want. 301 if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { 302 assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && 303 "Cannot narrow, it would be a lossy transformation"); 304 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, 305 DAG.getConstant(0, TLI.getVectorIdxTy())); 306 } 307 308 // Vector/Vector bitcast. 309 if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) 310 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 311 312 assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && 313 "Cannot handle this kind of promotion"); 314 // Promoted vector extract 315 bool Smaller = ValueVT.bitsLE(PartEVT); 316 return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 317 DL, ValueVT, Val); 318 319 } 320 321 // Trivial bitcast if the types are the same size and the destination 322 // vector type is legal. 323 if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && 324 TLI.isTypeLegal(ValueVT)) 325 return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); 326 327 // Handle cases such as i8 -> <1 x i1> 328 if (ValueVT.getVectorNumElements() != 1) { 329 diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, 330 "non-trivial scalar-to-vector conversion"); 331 return DAG.getUNDEF(ValueVT); 332 } 333 334 if (ValueVT.getVectorNumElements() == 1 && 335 ValueVT.getVectorElementType() != PartEVT) { 336 bool Smaller = ValueVT.bitsLE(PartEVT); 337 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 338 DL, ValueVT.getScalarType(), Val); 339 } 340 341 return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); 342 } 343 344 static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, 345 SDValue Val, SDValue *Parts, unsigned NumParts, 346 MVT PartVT, const Value *V); 347 348 /// getCopyToParts - Create a series of nodes that contain the specified value 349 /// split into legal parts. If the parts contain more bits than Val, then, for 350 /// integers, ExtendKind can be used to specify how to generate the extra bits. 351 static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, 352 SDValue Val, SDValue *Parts, unsigned NumParts, 353 MVT PartVT, const Value *V, 354 ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { 355 EVT ValueVT = Val.getValueType(); 356 357 // Handle the vector case separately. 358 if (ValueVT.isVector()) 359 return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V); 360 361 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 362 unsigned PartBits = PartVT.getSizeInBits(); 363 unsigned OrigNumParts = NumParts; 364 assert(TLI.isTypeLegal(PartVT) && "Copying to an illegal type!"); 365 366 if (NumParts == 0) 367 return; 368 369 assert(!ValueVT.isVector() && "Vector case handled elsewhere"); 370 EVT PartEVT = PartVT; 371 if (PartEVT == ValueVT) { 372 assert(NumParts == 1 && "No-op copy with multiple parts!"); 373 Parts[0] = Val; 374 return; 375 } 376 377 if (NumParts * PartBits > ValueVT.getSizeInBits()) { 378 // If the parts cover more bits than the value has, promote the value. 379 if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { 380 assert(NumParts == 1 && "Do not know what to promote to!"); 381 Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val); 382 } else { 383 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && 384 ValueVT.isInteger() && 385 "Unknown mismatch!"); 386 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 387 Val = DAG.getNode(ExtendKind, DL, ValueVT, Val); 388 if (PartVT == MVT::x86mmx) 389 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 390 } 391 } else if (PartBits == ValueVT.getSizeInBits()) { 392 // Different types of the same size. 393 assert(NumParts == 1 && PartEVT != ValueVT); 394 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 395 } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { 396 // If the parts cover less bits than value has, truncate the value. 397 assert((PartVT.isInteger() || PartVT == MVT::x86mmx) && 398 ValueVT.isInteger() && 399 "Unknown mismatch!"); 400 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 401 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 402 if (PartVT == MVT::x86mmx) 403 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 404 } 405 406 // The value may have changed - recompute ValueVT. 407 ValueVT = Val.getValueType(); 408 assert(NumParts * PartBits == ValueVT.getSizeInBits() && 409 "Failed to tile the value with PartVT!"); 410 411 if (NumParts == 1) { 412 if (PartEVT != ValueVT) 413 diagnosePossiblyInvalidConstraint(*DAG.getContext(), V, 414 "scalar-to-vector conversion failed"); 415 416 Parts[0] = Val; 417 return; 418 } 419 420 // Expand the value into multiple parts. 421 if (NumParts & (NumParts - 1)) { 422 // The number of parts is not a power of 2. Split off and copy the tail. 423 assert(PartVT.isInteger() && ValueVT.isInteger() && 424 "Do not know what to expand to!"); 425 unsigned RoundParts = 1 << Log2_32(NumParts); 426 unsigned RoundBits = RoundParts * PartBits; 427 unsigned OddParts = NumParts - RoundParts; 428 SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val, 429 DAG.getIntPtrConstant(RoundBits)); 430 getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V); 431 432 if (TLI.isBigEndian()) 433 // The odd parts were reversed by getCopyToParts - unreverse them. 434 std::reverse(Parts + RoundParts, Parts + NumParts); 435 436 NumParts = RoundParts; 437 ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits); 438 Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); 439 } 440 441 // The number of parts is a power of 2. Repeatedly bisect the value using 442 // EXTRACT_ELEMENT. 443 Parts[0] = DAG.getNode(ISD::BITCAST, DL, 444 EVT::getIntegerVT(*DAG.getContext(), 445 ValueVT.getSizeInBits()), 446 Val); 447 448 for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) { 449 for (unsigned i = 0; i < NumParts; i += StepSize) { 450 unsigned ThisBits = StepSize * PartBits / 2; 451 EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits); 452 SDValue &Part0 = Parts[i]; 453 SDValue &Part1 = Parts[i+StepSize/2]; 454 455 Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 456 ThisVT, Part0, DAG.getIntPtrConstant(1)); 457 Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, 458 ThisVT, Part0, DAG.getIntPtrConstant(0)); 459 460 if (ThisBits == PartBits && ThisVT != PartVT) { 461 Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0); 462 Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1); 463 } 464 } 465 } 466 467 if (TLI.isBigEndian()) 468 std::reverse(Parts, Parts + OrigNumParts); 469 } 470 471 472 /// getCopyToPartsVector - Create a series of nodes that contain the specified 473 /// value split into legal parts. 474 static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, 475 SDValue Val, SDValue *Parts, unsigned NumParts, 476 MVT PartVT, const Value *V) { 477 EVT ValueVT = Val.getValueType(); 478 assert(ValueVT.isVector() && "Not a vector"); 479 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 480 481 if (NumParts == 1) { 482 EVT PartEVT = PartVT; 483 if (PartEVT == ValueVT) { 484 // Nothing to do. 485 } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { 486 // Bitconvert vector->vector case. 487 Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); 488 } else if (PartVT.isVector() && 489 PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && 490 PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { 491 EVT ElementVT = PartVT.getVectorElementType(); 492 // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in 493 // undef elements. 494 SmallVector<SDValue, 16> Ops; 495 for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) 496 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 497 ElementVT, Val, DAG.getConstant(i, 498 TLI.getVectorIdxTy()))); 499 500 for (unsigned i = ValueVT.getVectorNumElements(), 501 e = PartVT.getVectorNumElements(); i != e; ++i) 502 Ops.push_back(DAG.getUNDEF(ElementVT)); 503 504 Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, Ops); 505 506 // FIXME: Use CONCAT for 2x -> 4x. 507 508 //SDValue UndefElts = DAG.getUNDEF(VectorTy); 509 //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); 510 } else if (PartVT.isVector() && 511 PartEVT.getVectorElementType().bitsGE( 512 ValueVT.getVectorElementType()) && 513 PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { 514 515 // Promoted vector extract 516 bool Smaller = PartEVT.bitsLE(ValueVT); 517 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 518 DL, PartVT, Val); 519 } else{ 520 // Vector -> scalar conversion. 521 assert(ValueVT.getVectorNumElements() == 1 && 522 "Only trivial vector-to-scalar conversions should get here!"); 523 Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 524 PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); 525 526 bool Smaller = ValueVT.bitsLE(PartVT); 527 Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), 528 DL, PartVT, Val); 529 } 530 531 Parts[0] = Val; 532 return; 533 } 534 535 // Handle a multi-element vector. 536 EVT IntermediateVT; 537 MVT RegisterVT; 538 unsigned NumIntermediates; 539 unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, 540 IntermediateVT, 541 NumIntermediates, RegisterVT); 542 unsigned NumElements = ValueVT.getVectorNumElements(); 543 544 assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); 545 NumParts = NumRegs; // Silence a compiler warning. 546 assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); 547 548 // Split the vector into intermediate operands. 549 SmallVector<SDValue, 8> Ops(NumIntermediates); 550 for (unsigned i = 0; i != NumIntermediates; ++i) { 551 if (IntermediateVT.isVector()) 552 Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, 553 IntermediateVT, Val, 554 DAG.getConstant(i * (NumElements / NumIntermediates), 555 TLI.getVectorIdxTy())); 556 else 557 Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 558 IntermediateVT, Val, 559 DAG.getConstant(i, TLI.getVectorIdxTy())); 560 } 561 562 // Split the intermediate operands into legal parts. 563 if (NumParts == NumIntermediates) { 564 // If the register was not expanded, promote or copy the value, 565 // as appropriate. 566 for (unsigned i = 0; i != NumParts; ++i) 567 getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V); 568 } else if (NumParts > 0) { 569 // If the intermediate type was expanded, split each the value into 570 // legal parts. 571 assert(NumIntermediates != 0 && "division by zero"); 572 assert(NumParts % NumIntermediates == 0 && 573 "Must expand into a divisible number of parts!"); 574 unsigned Factor = NumParts / NumIntermediates; 575 for (unsigned i = 0; i != NumIntermediates; ++i) 576 getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V); 577 } 578 } 579 580 namespace { 581 /// RegsForValue - This struct represents the registers (physical or virtual) 582 /// that a particular set of values is assigned, and the type information 583 /// about the value. The most common situation is to represent one value at a 584 /// time, but struct or array values are handled element-wise as multiple 585 /// values. The splitting of aggregates is performed recursively, so that we 586 /// never have aggregate-typed registers. The values at this point do not 587 /// necessarily have legal types, so each value may require one or more 588 /// registers of some legal type. 589 /// 590 struct RegsForValue { 591 /// ValueVTs - The value types of the values, which may not be legal, and 592 /// may need be promoted or synthesized from one or more registers. 593 /// 594 SmallVector<EVT, 4> ValueVTs; 595 596 /// RegVTs - The value types of the registers. This is the same size as 597 /// ValueVTs and it records, for each value, what the type of the assigned 598 /// register or registers are. (Individual values are never synthesized 599 /// from more than one type of register.) 600 /// 601 /// With virtual registers, the contents of RegVTs is redundant with TLI's 602 /// getRegisterType member function, however when with physical registers 603 /// it is necessary to have a separate record of the types. 604 /// 605 SmallVector<MVT, 4> RegVTs; 606 607 /// Regs - This list holds the registers assigned to the values. 608 /// Each legal or promoted value requires one register, and each 609 /// expanded value requires multiple registers. 610 /// 611 SmallVector<unsigned, 4> Regs; 612 613 RegsForValue() {} 614 615 RegsForValue(const SmallVector<unsigned, 4> ®s, 616 MVT regvt, EVT valuevt) 617 : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} 618 619 RegsForValue(LLVMContext &Context, const TargetLowering &tli, 620 unsigned Reg, Type *Ty) { 621 ComputeValueVTs(tli, Ty, ValueVTs); 622 623 for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { 624 EVT ValueVT = ValueVTs[Value]; 625 unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); 626 MVT RegisterVT = tli.getRegisterType(Context, ValueVT); 627 for (unsigned i = 0; i != NumRegs; ++i) 628 Regs.push_back(Reg + i); 629 RegVTs.push_back(RegisterVT); 630 Reg += NumRegs; 631 } 632 } 633 634 /// append - Add the specified values to this one. 635 void append(const RegsForValue &RHS) { 636 ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); 637 RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); 638 Regs.append(RHS.Regs.begin(), RHS.Regs.end()); 639 } 640 641 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 642 /// this value and returns the result as a ValueVTs value. This uses 643 /// Chain/Flag as the input and updates them for the output Chain/Flag. 644 /// If the Flag pointer is NULL, no flag is used. 645 SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, 646 SDLoc dl, 647 SDValue &Chain, SDValue *Flag, 648 const Value *V = nullptr) const; 649 650 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 651 /// specified value into the registers specified by this object. This uses 652 /// Chain/Flag as the input and updates them for the output Chain/Flag. 653 /// If the Flag pointer is NULL, no flag is used. 654 void 655 getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, 656 SDValue *Flag, const Value *V, 657 ISD::NodeType PreferredExtendType = ISD::ANY_EXTEND) const; 658 659 /// AddInlineAsmOperands - Add this value to the specified inlineasm node 660 /// operand list. This adds the code marker, matching input operand index 661 /// (if applicable), and includes the number of values added into it. 662 void AddInlineAsmOperands(unsigned Kind, 663 bool HasMatching, unsigned MatchingIdx, 664 SelectionDAG &DAG, 665 std::vector<SDValue> &Ops) const; 666 }; 667 } 668 669 /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from 670 /// this value and returns the result as a ValueVT value. This uses 671 /// Chain/Flag as the input and updates them for the output Chain/Flag. 672 /// If the Flag pointer is NULL, no flag is used. 673 SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, 674 FunctionLoweringInfo &FuncInfo, 675 SDLoc dl, 676 SDValue &Chain, SDValue *Flag, 677 const Value *V) const { 678 // A Value with type {} or [0 x %t] needs no registers. 679 if (ValueVTs.empty()) 680 return SDValue(); 681 682 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 683 684 // Assemble the legal parts into the final values. 685 SmallVector<SDValue, 4> Values(ValueVTs.size()); 686 SmallVector<SDValue, 8> Parts; 687 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { 688 // Copy the legal parts from the registers. 689 EVT ValueVT = ValueVTs[Value]; 690 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); 691 MVT RegisterVT = RegVTs[Value]; 692 693 Parts.resize(NumRegs); 694 for (unsigned i = 0; i != NumRegs; ++i) { 695 SDValue P; 696 if (!Flag) { 697 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); 698 } else { 699 P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); 700 *Flag = P.getValue(2); 701 } 702 703 Chain = P.getValue(1); 704 Parts[i] = P; 705 706 // If the source register was virtual and if we know something about it, 707 // add an assert node. 708 if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) || 709 !RegisterVT.isInteger() || RegisterVT.isVector()) 710 continue; 711 712 const FunctionLoweringInfo::LiveOutInfo *LOI = 713 FuncInfo.GetLiveOutRegInfo(Regs[Part+i]); 714 if (!LOI) 715 continue; 716 717 unsigned RegSize = RegisterVT.getSizeInBits(); 718 unsigned NumSignBits = LOI->NumSignBits; 719 unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); 720 721 if (NumZeroBits == RegSize) { 722 // The current value is a zero. 723 // Explicitly express that as it would be easier for 724 // optimizations to kick in. 725 Parts[i] = DAG.getConstant(0, RegisterVT); 726 continue; 727 } 728 729 // FIXME: We capture more information than the dag can represent. For 730 // now, just use the tightest assertzext/assertsext possible. 731 bool isSExt = true; 732 EVT FromVT(MVT::Other); 733 if (NumSignBits == RegSize) 734 isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 735 else if (NumZeroBits >= RegSize-1) 736 isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 737 else if (NumSignBits > RegSize-8) 738 isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 739 else if (NumZeroBits >= RegSize-8) 740 isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 741 else if (NumSignBits > RegSize-16) 742 isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 743 else if (NumZeroBits >= RegSize-16) 744 isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 745 else if (NumSignBits > RegSize-32) 746 isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 747 else if (NumZeroBits >= RegSize-32) 748 isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 749 else 750 continue; 751 752 // Add an assertion node. 753 assert(FromVT != MVT::Other); 754 Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, 755 RegisterVT, P, DAG.getValueType(FromVT)); 756 } 757 758 Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), 759 NumRegs, RegisterVT, ValueVT, V); 760 Part += NumRegs; 761 Parts.clear(); 762 } 763 764 return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values); 765 } 766 767 /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the 768 /// specified value into the registers specified by this object. This uses 769 /// Chain/Flag as the input and updates them for the output Chain/Flag. 770 /// If the Flag pointer is NULL, no flag is used. 771 void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, 772 SDValue &Chain, SDValue *Flag, const Value *V, 773 ISD::NodeType PreferredExtendType) const { 774 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 775 ISD::NodeType ExtendKind = PreferredExtendType; 776 777 // Get the list of the values's legal parts. 778 unsigned NumRegs = Regs.size(); 779 SmallVector<SDValue, 8> Parts(NumRegs); 780 for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { 781 EVT ValueVT = ValueVTs[Value]; 782 unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); 783 MVT RegisterVT = RegVTs[Value]; 784 785 if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) 786 ExtendKind = ISD::ZERO_EXTEND; 787 788 getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), 789 &Parts[Part], NumParts, RegisterVT, V, ExtendKind); 790 Part += NumParts; 791 } 792 793 // Copy the parts into the registers. 794 SmallVector<SDValue, 8> Chains(NumRegs); 795 for (unsigned i = 0; i != NumRegs; ++i) { 796 SDValue Part; 797 if (!Flag) { 798 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); 799 } else { 800 Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); 801 *Flag = Part.getValue(1); 802 } 803 804 Chains[i] = Part.getValue(0); 805 } 806 807 if (NumRegs == 1 || Flag) 808 // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is 809 // flagged to it. That is the CopyToReg nodes and the user are considered 810 // a single scheduling unit. If we create a TokenFactor and return it as 811 // chain, then the TokenFactor is both a predecessor (operand) of the 812 // user as well as a successor (the TF operands are flagged to the user). 813 // c1, f1 = CopyToReg 814 // c2, f2 = CopyToReg 815 // c3 = TokenFactor c1, c2 816 // ... 817 // = op c3, ..., f2 818 Chain = Chains[NumRegs-1]; 819 else 820 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains); 821 } 822 823 /// AddInlineAsmOperands - Add this value to the specified inlineasm node 824 /// operand list. This adds the code marker and includes the number of 825 /// values added into it. 826 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, 827 unsigned MatchingIdx, 828 SelectionDAG &DAG, 829 std::vector<SDValue> &Ops) const { 830 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 831 832 unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); 833 if (HasMatching) 834 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); 835 else if (!Regs.empty() && 836 TargetRegisterInfo::isVirtualRegister(Regs.front())) { 837 // Put the register class of the virtual registers in the flag word. That 838 // way, later passes can recompute register class constraints for inline 839 // assembly as well as normal instructions. 840 // Don't do this for tied operands that can use the regclass information 841 // from the def. 842 const MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 843 const TargetRegisterClass *RC = MRI.getRegClass(Regs.front()); 844 Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID()); 845 } 846 847 SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); 848 Ops.push_back(Res); 849 850 unsigned SP = TLI.getStackPointerRegisterToSaveRestore(); 851 for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { 852 unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); 853 MVT RegisterVT = RegVTs[Value]; 854 for (unsigned i = 0; i != NumRegs; ++i) { 855 assert(Reg < Regs.size() && "Mismatch in # registers expected"); 856 unsigned TheReg = Regs[Reg++]; 857 Ops.push_back(DAG.getRegister(TheReg, RegisterVT)); 858 859 if (TheReg == SP && Code == InlineAsm::Kind_Clobber) { 860 // If we clobbered the stack pointer, MFI should know about it. 861 assert(DAG.getMachineFunction().getFrameInfo()-> 862 hasInlineAsmWithSPAdjust()); 863 } 864 } 865 } 866 } 867 868 void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa, 869 const TargetLibraryInfo *li) { 870 AA = &aa; 871 GFI = gfi; 872 LibInfo = li; 873 DL = DAG.getTarget().getDataLayout(); 874 Context = DAG.getContext(); 875 LPadToCallSiteMap.clear(); 876 } 877 878 /// clear - Clear out the current SelectionDAG and the associated 879 /// state and prepare this SelectionDAGBuilder object to be used 880 /// for a new block. This doesn't clear out information about 881 /// additional blocks that are needed to complete switch lowering 882 /// or PHI node updating; that information is cleared out as it is 883 /// consumed. 884 void SelectionDAGBuilder::clear() { 885 NodeMap.clear(); 886 UnusedArgNodeMap.clear(); 887 PendingLoads.clear(); 888 PendingExports.clear(); 889 CurInst = nullptr; 890 HasTailCall = false; 891 SDNodeOrder = LowestSDNodeOrder; 892 StatepointLowering.clear(); 893 } 894 895 /// clearDanglingDebugInfo - Clear the dangling debug information 896 /// map. This function is separated from the clear so that debug 897 /// information that is dangling in a basic block can be properly 898 /// resolved in a different basic block. This allows the 899 /// SelectionDAG to resolve dangling debug information attached 900 /// to PHI nodes. 901 void SelectionDAGBuilder::clearDanglingDebugInfo() { 902 DanglingDebugInfoMap.clear(); 903 } 904 905 /// getRoot - Return the current virtual root of the Selection DAG, 906 /// flushing any PendingLoad items. This must be done before emitting 907 /// a store or any other node that may need to be ordered after any 908 /// prior load instructions. 909 /// 910 SDValue SelectionDAGBuilder::getRoot() { 911 if (PendingLoads.empty()) 912 return DAG.getRoot(); 913 914 if (PendingLoads.size() == 1) { 915 SDValue Root = PendingLoads[0]; 916 DAG.setRoot(Root); 917 PendingLoads.clear(); 918 return Root; 919 } 920 921 // Otherwise, we have to make a token factor node. 922 SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 923 PendingLoads); 924 PendingLoads.clear(); 925 DAG.setRoot(Root); 926 return Root; 927 } 928 929 /// getControlRoot - Similar to getRoot, but instead of flushing all the 930 /// PendingLoad items, flush all the PendingExports items. It is necessary 931 /// to do this before emitting a terminator instruction. 932 /// 933 SDValue SelectionDAGBuilder::getControlRoot() { 934 SDValue Root = DAG.getRoot(); 935 936 if (PendingExports.empty()) 937 return Root; 938 939 // Turn all of the CopyToReg chains into one factored node. 940 if (Root.getOpcode() != ISD::EntryToken) { 941 unsigned i = 0, e = PendingExports.size(); 942 for (; i != e; ++i) { 943 assert(PendingExports[i].getNode()->getNumOperands() > 1); 944 if (PendingExports[i].getNode()->getOperand(0) == Root) 945 break; // Don't add the root if we already indirectly depend on it. 946 } 947 948 if (i == e) 949 PendingExports.push_back(Root); 950 } 951 952 Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 953 PendingExports); 954 PendingExports.clear(); 955 DAG.setRoot(Root); 956 return Root; 957 } 958 959 void SelectionDAGBuilder::visit(const Instruction &I) { 960 // Set up outgoing PHI node register values before emitting the terminator. 961 if (isa<TerminatorInst>(&I)) 962 HandlePHINodesInSuccessorBlocks(I.getParent()); 963 964 ++SDNodeOrder; 965 966 CurInst = &I; 967 968 visit(I.getOpcode(), I); 969 970 if (!isa<TerminatorInst>(&I) && !HasTailCall) 971 CopyToExportRegsIfNeeded(&I); 972 973 CurInst = nullptr; 974 } 975 976 void SelectionDAGBuilder::visitPHI(const PHINode &) { 977 llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!"); 978 } 979 980 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { 981 // Note: this doesn't use InstVisitor, because it has to work with 982 // ConstantExpr's in addition to instructions. 983 switch (Opcode) { 984 default: llvm_unreachable("Unknown instruction type encountered!"); 985 // Build the switch statement using the Instruction.def file. 986 #define HANDLE_INST(NUM, OPCODE, CLASS) \ 987 case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; 988 #include "llvm/IR/Instruction.def" 989 } 990 } 991 992 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, 993 // generate the debug data structures now that we've seen its definition. 994 void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, 995 SDValue Val) { 996 DanglingDebugInfo &DDI = DanglingDebugInfoMap[V]; 997 if (DDI.getDI()) { 998 const DbgValueInst *DI = DDI.getDI(); 999 DebugLoc dl = DDI.getdl(); 1000 unsigned DbgSDNodeOrder = DDI.getSDNodeOrder(); 1001 MDLocalVariable *Variable = DI->getVariable(); 1002 MDExpression *Expr = DI->getExpression(); 1003 uint64_t Offset = DI->getOffset(); 1004 // A dbg.value for an alloca is always indirect. 1005 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; 1006 SDDbgValue *SDV; 1007 if (Val.getNode()) { 1008 if (!EmitFuncArgumentDbgValue(V, Variable, Expr, Offset, IsIndirect, 1009 Val)) { 1010 SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(), 1011 IsIndirect, Offset, dl, DbgSDNodeOrder); 1012 DAG.AddDbgValue(SDV, Val.getNode(), false); 1013 } 1014 } else 1015 DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); 1016 DanglingDebugInfoMap[V] = DanglingDebugInfo(); 1017 } 1018 } 1019 1020 /// getCopyFromRegs - If there was virtual register allocated for the value V 1021 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise. 1022 SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) { 1023 DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V); 1024 SDValue res; 1025 1026 if (It != FuncInfo.ValueMap.end()) { 1027 unsigned InReg = It->second; 1028 RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(), InReg, 1029 Ty); 1030 SDValue Chain = DAG.getEntryNode(); 1031 res = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); 1032 resolveDanglingDebugInfo(V, res); 1033 } 1034 1035 return res; 1036 } 1037 1038 /// getValue - Return an SDValue for the given Value. 1039 SDValue SelectionDAGBuilder::getValue(const Value *V) { 1040 // If we already have an SDValue for this value, use it. It's important 1041 // to do this first, so that we don't create a CopyFromReg if we already 1042 // have a regular SDValue. 1043 SDValue &N = NodeMap[V]; 1044 if (N.getNode()) return N; 1045 1046 // If there's a virtual register allocated and initialized for this 1047 // value, use it. 1048 SDValue copyFromReg = getCopyFromRegs(V, V->getType()); 1049 if (copyFromReg.getNode()) { 1050 return copyFromReg; 1051 } 1052 1053 // Otherwise create a new SDValue and remember it. 1054 SDValue Val = getValueImpl(V); 1055 NodeMap[V] = Val; 1056 resolveDanglingDebugInfo(V, Val); 1057 return Val; 1058 } 1059 1060 /// getNonRegisterValue - Return an SDValue for the given Value, but 1061 /// don't look in FuncInfo.ValueMap for a virtual register. 1062 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { 1063 // If we already have an SDValue for this value, use it. 1064 SDValue &N = NodeMap[V]; 1065 if (N.getNode()) return N; 1066 1067 // Otherwise create a new SDValue and remember it. 1068 SDValue Val = getValueImpl(V); 1069 NodeMap[V] = Val; 1070 resolveDanglingDebugInfo(V, Val); 1071 return Val; 1072 } 1073 1074 /// getValueImpl - Helper function for getValue and getNonRegisterValue. 1075 /// Create an SDValue for the given value. 1076 SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { 1077 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1078 1079 if (const Constant *C = dyn_cast<Constant>(V)) { 1080 EVT VT = TLI.getValueType(V->getType(), true); 1081 1082 if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) 1083 return DAG.getConstant(*CI, VT); 1084 1085 if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 1086 return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); 1087 1088 if (isa<ConstantPointerNull>(C)) { 1089 unsigned AS = V->getType()->getPointerAddressSpace(); 1090 return DAG.getConstant(0, TLI.getPointerTy(AS)); 1091 } 1092 1093 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 1094 return DAG.getConstantFP(*CFP, VT); 1095 1096 if (isa<UndefValue>(C) && !V->getType()->isAggregateType()) 1097 return DAG.getUNDEF(VT); 1098 1099 if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { 1100 visit(CE->getOpcode(), *CE); 1101 SDValue N1 = NodeMap[V]; 1102 assert(N1.getNode() && "visit didn't populate the NodeMap!"); 1103 return N1; 1104 } 1105 1106 if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) { 1107 SmallVector<SDValue, 4> Constants; 1108 for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end(); 1109 OI != OE; ++OI) { 1110 SDNode *Val = getValue(*OI).getNode(); 1111 // If the operand is an empty aggregate, there are no values. 1112 if (!Val) continue; 1113 // Add each leaf value from the operand to the Constants list 1114 // to form a flattened list of all the values. 1115 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) 1116 Constants.push_back(SDValue(Val, i)); 1117 } 1118 1119 return DAG.getMergeValues(Constants, getCurSDLoc()); 1120 } 1121 1122 if (const ConstantDataSequential *CDS = 1123 dyn_cast<ConstantDataSequential>(C)) { 1124 SmallVector<SDValue, 4> Ops; 1125 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { 1126 SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode(); 1127 // Add each leaf value from the operand to the Constants list 1128 // to form a flattened list of all the values. 1129 for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i) 1130 Ops.push_back(SDValue(Val, i)); 1131 } 1132 1133 if (isa<ArrayType>(CDS->getType())) 1134 return DAG.getMergeValues(Ops, getCurSDLoc()); 1135 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), 1136 VT, Ops); 1137 } 1138 1139 if (C->getType()->isStructTy() || C->getType()->isArrayTy()) { 1140 assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) && 1141 "Unknown struct or array constant!"); 1142 1143 SmallVector<EVT, 4> ValueVTs; 1144 ComputeValueVTs(TLI, C->getType(), ValueVTs); 1145 unsigned NumElts = ValueVTs.size(); 1146 if (NumElts == 0) 1147 return SDValue(); // empty struct 1148 SmallVector<SDValue, 4> Constants(NumElts); 1149 for (unsigned i = 0; i != NumElts; ++i) { 1150 EVT EltVT = ValueVTs[i]; 1151 if (isa<UndefValue>(C)) 1152 Constants[i] = DAG.getUNDEF(EltVT); 1153 else if (EltVT.isFloatingPoint()) 1154 Constants[i] = DAG.getConstantFP(0, EltVT); 1155 else 1156 Constants[i] = DAG.getConstant(0, EltVT); 1157 } 1158 1159 return DAG.getMergeValues(Constants, getCurSDLoc()); 1160 } 1161 1162 if (const BlockAddress *BA = dyn_cast<BlockAddress>(C)) 1163 return DAG.getBlockAddress(BA, VT); 1164 1165 VectorType *VecTy = cast<VectorType>(V->getType()); 1166 unsigned NumElements = VecTy->getNumElements(); 1167 1168 // Now that we know the number and type of the elements, get that number of 1169 // elements into the Ops array based on what kind of constant it is. 1170 SmallVector<SDValue, 16> Ops; 1171 if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) { 1172 for (unsigned i = 0; i != NumElements; ++i) 1173 Ops.push_back(getValue(CV->getOperand(i))); 1174 } else { 1175 assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!"); 1176 EVT EltVT = TLI.getValueType(VecTy->getElementType()); 1177 1178 SDValue Op; 1179 if (EltVT.isFloatingPoint()) 1180 Op = DAG.getConstantFP(0, EltVT); 1181 else 1182 Op = DAG.getConstant(0, EltVT); 1183 Ops.assign(NumElements, Op); 1184 } 1185 1186 // Create a BUILD_VECTOR node. 1187 return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops); 1188 } 1189 1190 // If this is a static alloca, generate it as the frameindex instead of 1191 // computation. 1192 if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) { 1193 DenseMap<const AllocaInst*, int>::iterator SI = 1194 FuncInfo.StaticAllocaMap.find(AI); 1195 if (SI != FuncInfo.StaticAllocaMap.end()) 1196 return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); 1197 } 1198 1199 // If this is an instruction which fast-isel has deferred, select it now. 1200 if (const Instruction *Inst = dyn_cast<Instruction>(V)) { 1201 unsigned InReg = FuncInfo.InitializeRegForValue(Inst); 1202 RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); 1203 SDValue Chain = DAG.getEntryNode(); 1204 return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V); 1205 } 1206 1207 llvm_unreachable("Can't get register for value!"); 1208 } 1209 1210 void SelectionDAGBuilder::visitRet(const ReturnInst &I) { 1211 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1212 SDValue Chain = getControlRoot(); 1213 SmallVector<ISD::OutputArg, 8> Outs; 1214 SmallVector<SDValue, 8> OutVals; 1215 1216 if (!FuncInfo.CanLowerReturn) { 1217 unsigned DemoteReg = FuncInfo.DemoteRegister; 1218 const Function *F = I.getParent()->getParent(); 1219 1220 // Emit a store of the return value through the virtual register. 1221 // Leave Outs empty so that LowerReturn won't try to load return 1222 // registers the usual way. 1223 SmallVector<EVT, 1> PtrValueVTs; 1224 ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), 1225 PtrValueVTs); 1226 1227 SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); 1228 SDValue RetOp = getValue(I.getOperand(0)); 1229 1230 SmallVector<EVT, 4> ValueVTs; 1231 SmallVector<uint64_t, 4> Offsets; 1232 ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); 1233 unsigned NumValues = ValueVTs.size(); 1234 1235 SmallVector<SDValue, 4> Chains(NumValues); 1236 for (unsigned i = 0; i != NumValues; ++i) { 1237 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), 1238 RetPtr.getValueType(), RetPtr, 1239 DAG.getIntPtrConstant(Offsets[i])); 1240 Chains[i] = 1241 DAG.getStore(Chain, getCurSDLoc(), 1242 SDValue(RetOp.getNode(), RetOp.getResNo() + i), 1243 // FIXME: better loc info would be nice. 1244 Add, MachinePointerInfo(), false, false, 0); 1245 } 1246 1247 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), 1248 MVT::Other, Chains); 1249 } else if (I.getNumOperands() != 0) { 1250 SmallVector<EVT, 4> ValueVTs; 1251 ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); 1252 unsigned NumValues = ValueVTs.size(); 1253 if (NumValues) { 1254 SDValue RetOp = getValue(I.getOperand(0)); 1255 1256 const Function *F = I.getParent()->getParent(); 1257 1258 ISD::NodeType ExtendKind = ISD::ANY_EXTEND; 1259 if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 1260 Attribute::SExt)) 1261 ExtendKind = ISD::SIGN_EXTEND; 1262 else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 1263 Attribute::ZExt)) 1264 ExtendKind = ISD::ZERO_EXTEND; 1265 1266 LLVMContext &Context = F->getContext(); 1267 bool RetInReg = F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, 1268 Attribute::InReg); 1269 1270 for (unsigned j = 0; j != NumValues; ++j) { 1271 EVT VT = ValueVTs[j]; 1272 1273 if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) 1274 VT = TLI.getTypeForExtArgOrReturn(Context, VT, ExtendKind); 1275 1276 unsigned NumParts = TLI.getNumRegisters(Context, VT); 1277 MVT PartVT = TLI.getRegisterType(Context, VT); 1278 SmallVector<SDValue, 4> Parts(NumParts); 1279 getCopyToParts(DAG, getCurSDLoc(), 1280 SDValue(RetOp.getNode(), RetOp.getResNo() + j), 1281 &Parts[0], NumParts, PartVT, &I, ExtendKind); 1282 1283 // 'inreg' on function refers to return value 1284 ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); 1285 if (RetInReg) 1286 Flags.setInReg(); 1287 1288 // Propagate extension type if any 1289 if (ExtendKind == ISD::SIGN_EXTEND) 1290 Flags.setSExt(); 1291 else if (ExtendKind == ISD::ZERO_EXTEND) 1292 Flags.setZExt(); 1293 1294 for (unsigned i = 0; i < NumParts; ++i) { 1295 Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), 1296 VT, /*isfixed=*/true, 0, 0)); 1297 OutVals.push_back(Parts[i]); 1298 } 1299 } 1300 } 1301 } 1302 1303 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 1304 CallingConv::ID CallConv = 1305 DAG.getMachineFunction().getFunction()->getCallingConv(); 1306 Chain = DAG.getTargetLoweringInfo().LowerReturn( 1307 Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG); 1308 1309 // Verify that the target's LowerReturn behaved as expected. 1310 assert(Chain.getNode() && Chain.getValueType() == MVT::Other && 1311 "LowerReturn didn't return a valid chain!"); 1312 1313 // Update the DAG with the new chain value resulting from return lowering. 1314 DAG.setRoot(Chain); 1315 } 1316 1317 /// CopyToExportRegsIfNeeded - If the given value has virtual registers 1318 /// created for it, emit nodes to copy the value into the virtual 1319 /// registers. 1320 void SelectionDAGBuilder::CopyToExportRegsIfNeeded(const Value *V) { 1321 // Skip empty types 1322 if (V->getType()->isEmptyTy()) 1323 return; 1324 1325 DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); 1326 if (VMI != FuncInfo.ValueMap.end()) { 1327 assert(!V->use_empty() && "Unused value assigned virtual registers!"); 1328 CopyValueToVirtualRegister(V, VMI->second); 1329 } 1330 } 1331 1332 /// ExportFromCurrentBlock - If this condition isn't known to be exported from 1333 /// the current basic block, add it to ValueMap now so that we'll get a 1334 /// CopyTo/FromReg. 1335 void SelectionDAGBuilder::ExportFromCurrentBlock(const Value *V) { 1336 // No need to export constants. 1337 if (!isa<Instruction>(V) && !isa<Argument>(V)) return; 1338 1339 // Already exported? 1340 if (FuncInfo.isExportedInst(V)) return; 1341 1342 unsigned Reg = FuncInfo.InitializeRegForValue(V); 1343 CopyValueToVirtualRegister(V, Reg); 1344 } 1345 1346 bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V, 1347 const BasicBlock *FromBB) { 1348 // The operands of the setcc have to be in this block. We don't know 1349 // how to export them from some other block. 1350 if (const Instruction *VI = dyn_cast<Instruction>(V)) { 1351 // Can export from current BB. 1352 if (VI->getParent() == FromBB) 1353 return true; 1354 1355 // Is already exported, noop. 1356 return FuncInfo.isExportedInst(V); 1357 } 1358 1359 // If this is an argument, we can export it if the BB is the entry block or 1360 // if it is already exported. 1361 if (isa<Argument>(V)) { 1362 if (FromBB == &FromBB->getParent()->getEntryBlock()) 1363 return true; 1364 1365 // Otherwise, can only export this if it is already exported. 1366 return FuncInfo.isExportedInst(V); 1367 } 1368 1369 // Otherwise, constants can always be exported. 1370 return true; 1371 } 1372 1373 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks. 1374 uint32_t SelectionDAGBuilder::getEdgeWeight(const MachineBasicBlock *Src, 1375 const MachineBasicBlock *Dst) const { 1376 BranchProbabilityInfo *BPI = FuncInfo.BPI; 1377 if (!BPI) 1378 return 0; 1379 const BasicBlock *SrcBB = Src->getBasicBlock(); 1380 const BasicBlock *DstBB = Dst->getBasicBlock(); 1381 return BPI->getEdgeWeight(SrcBB, DstBB); 1382 } 1383 1384 void SelectionDAGBuilder:: 1385 addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst, 1386 uint32_t Weight /* = 0 */) { 1387 if (!Weight) 1388 Weight = getEdgeWeight(Src, Dst); 1389 Src->addSuccessor(Dst, Weight); 1390 } 1391 1392 1393 static bool InBlock(const Value *V, const BasicBlock *BB) { 1394 if (const Instruction *I = dyn_cast<Instruction>(V)) 1395 return I->getParent() == BB; 1396 return true; 1397 } 1398 1399 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions. 1400 /// This function emits a branch and is used at the leaves of an OR or an 1401 /// AND operator tree. 1402 /// 1403 void 1404 SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond, 1405 MachineBasicBlock *TBB, 1406 MachineBasicBlock *FBB, 1407 MachineBasicBlock *CurBB, 1408 MachineBasicBlock *SwitchBB, 1409 uint32_t TWeight, 1410 uint32_t FWeight) { 1411 const BasicBlock *BB = CurBB->getBasicBlock(); 1412 1413 // If the leaf of the tree is a comparison, merge the condition into 1414 // the caseblock. 1415 if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) { 1416 // The operands of the cmp have to be in this block. We don't know 1417 // how to export them from some other block. If this is the first block 1418 // of the sequence, no exporting is needed. 1419 if (CurBB == SwitchBB || 1420 (isExportableFromCurrentBlock(BOp->getOperand(0), BB) && 1421 isExportableFromCurrentBlock(BOp->getOperand(1), BB))) { 1422 ISD::CondCode Condition; 1423 if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) { 1424 Condition = getICmpCondCode(IC->getPredicate()); 1425 } else if (const FCmpInst *FC = dyn_cast<FCmpInst>(Cond)) { 1426 Condition = getFCmpCondCode(FC->getPredicate()); 1427 if (TM.Options.NoNaNsFPMath) 1428 Condition = getFCmpCodeWithoutNaN(Condition); 1429 } else { 1430 (void)Condition; // silence warning. 1431 llvm_unreachable("Unknown compare instruction"); 1432 } 1433 1434 CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr, 1435 TBB, FBB, CurBB, TWeight, FWeight); 1436 SwitchCases.push_back(CB); 1437 return; 1438 } 1439 } 1440 1441 // Create a CaseBlock record representing this branch. 1442 CaseBlock CB(ISD::SETEQ, Cond, ConstantInt::getTrue(*DAG.getContext()), 1443 nullptr, TBB, FBB, CurBB, TWeight, FWeight); 1444 SwitchCases.push_back(CB); 1445 } 1446 1447 /// Scale down both weights to fit into uint32_t. 1448 static void ScaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { 1449 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; 1450 uint32_t Scale = (NewMax / UINT32_MAX) + 1; 1451 NewTrue = NewTrue / Scale; 1452 NewFalse = NewFalse / Scale; 1453 } 1454 1455 /// FindMergedConditions - If Cond is an expression like 1456 void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, 1457 MachineBasicBlock *TBB, 1458 MachineBasicBlock *FBB, 1459 MachineBasicBlock *CurBB, 1460 MachineBasicBlock *SwitchBB, 1461 unsigned Opc, uint32_t TWeight, 1462 uint32_t FWeight) { 1463 // If this node is not part of the or/and tree, emit it as a branch. 1464 const Instruction *BOp = dyn_cast<Instruction>(Cond); 1465 if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) || 1466 (unsigned)BOp->getOpcode() != Opc || !BOp->hasOneUse() || 1467 BOp->getParent() != CurBB->getBasicBlock() || 1468 !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || 1469 !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { 1470 EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, 1471 TWeight, FWeight); 1472 return; 1473 } 1474 1475 // Create TmpBB after CurBB. 1476 MachineFunction::iterator BBI = CurBB; 1477 MachineFunction &MF = DAG.getMachineFunction(); 1478 MachineBasicBlock *TmpBB = MF.CreateMachineBasicBlock(CurBB->getBasicBlock()); 1479 CurBB->getParent()->insert(++BBI, TmpBB); 1480 1481 if (Opc == Instruction::Or) { 1482 // Codegen X | Y as: 1483 // BB1: 1484 // jmp_if_X TBB 1485 // jmp TmpBB 1486 // TmpBB: 1487 // jmp_if_Y TBB 1488 // jmp FBB 1489 // 1490 1491 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 1492 // The requirement is that 1493 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) 1494 // = TrueProb for orignal BB. 1495 // Assuming the orignal weights are A and B, one choice is to set BB1's 1496 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice 1497 // assumes that 1498 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. 1499 // Another choice is to assume TrueProb for BB1 equals to TrueProb for 1500 // TmpBB, but the math is more complicated. 1501 1502 uint64_t NewTrueWeight = TWeight; 1503 uint64_t NewFalseWeight = (uint64_t)TWeight + 2 * (uint64_t)FWeight; 1504 ScaleWeights(NewTrueWeight, NewFalseWeight); 1505 // Emit the LHS condition. 1506 FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, 1507 NewTrueWeight, NewFalseWeight); 1508 1509 NewTrueWeight = TWeight; 1510 NewFalseWeight = 2 * (uint64_t)FWeight; 1511 ScaleWeights(NewTrueWeight, NewFalseWeight); 1512 // Emit the RHS condition into TmpBB. 1513 FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, 1514 NewTrueWeight, NewFalseWeight); 1515 } else { 1516 assert(Opc == Instruction::And && "Unknown merge op!"); 1517 // Codegen X & Y as: 1518 // BB1: 1519 // jmp_if_X TmpBB 1520 // jmp FBB 1521 // TmpBB: 1522 // jmp_if_Y TBB 1523 // jmp FBB 1524 // 1525 // This requires creation of TmpBB after CurBB. 1526 1527 // We have flexibility in setting Prob for BB1 and Prob for TmpBB. 1528 // The requirement is that 1529 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) 1530 // = FalseProb for orignal BB. 1531 // Assuming the orignal weights are A and B, one choice is to set BB1's 1532 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice 1533 // assumes that 1534 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. 1535 1536 uint64_t NewTrueWeight = 2 * (uint64_t)TWeight + (uint64_t)FWeight; 1537 uint64_t NewFalseWeight = FWeight; 1538 ScaleWeights(NewTrueWeight, NewFalseWeight); 1539 // Emit the LHS condition. 1540 FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, 1541 NewTrueWeight, NewFalseWeight); 1542 1543 NewTrueWeight = 2 * (uint64_t)TWeight; 1544 NewFalseWeight = FWeight; 1545 ScaleWeights(NewTrueWeight, NewFalseWeight); 1546 // Emit the RHS condition into TmpBB. 1547 FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, 1548 NewTrueWeight, NewFalseWeight); 1549 } 1550 } 1551 1552 /// If the set of cases should be emitted as a series of branches, return true. 1553 /// If we should emit this as a bunch of and/or'd together conditions, return 1554 /// false. 1555 bool 1556 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) { 1557 if (Cases.size() != 2) return true; 1558 1559 // If this is two comparisons of the same values or'd or and'd together, they 1560 // will get folded into a single comparison, so don't emit two blocks. 1561 if ((Cases[0].CmpLHS == Cases[1].CmpLHS && 1562 Cases[0].CmpRHS == Cases[1].CmpRHS) || 1563 (Cases[0].CmpRHS == Cases[1].CmpLHS && 1564 Cases[0].CmpLHS == Cases[1].CmpRHS)) { 1565 return false; 1566 } 1567 1568 // Handle: (X != null) | (Y != null) --> (X|Y) != 0 1569 // Handle: (X == null) & (Y == null) --> (X|Y) == 0 1570 if (Cases[0].CmpRHS == Cases[1].CmpRHS && 1571 Cases[0].CC == Cases[1].CC && 1572 isa<Constant>(Cases[0].CmpRHS) && 1573 cast<Constant>(Cases[0].CmpRHS)->isNullValue()) { 1574 if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB) 1575 return false; 1576 if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB) 1577 return false; 1578 } 1579 1580 return true; 1581 } 1582 1583 void SelectionDAGBuilder::visitBr(const BranchInst &I) { 1584 MachineBasicBlock *BrMBB = FuncInfo.MBB; 1585 1586 // Update machine-CFG edges. 1587 MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; 1588 1589 if (I.isUnconditional()) { 1590 // Update machine-CFG edges. 1591 BrMBB->addSuccessor(Succ0MBB); 1592 1593 // If this is not a fall-through branch or optimizations are switched off, 1594 // emit the branch. 1595 if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None) 1596 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 1597 MVT::Other, getControlRoot(), 1598 DAG.getBasicBlock(Succ0MBB))); 1599 1600 return; 1601 } 1602 1603 // If this condition is one of the special cases we handle, do special stuff 1604 // now. 1605 const Value *CondVal = I.getCondition(); 1606 MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)]; 1607 1608 // If this is a series of conditions that are or'd or and'd together, emit 1609 // this as a sequence of branches instead of setcc's with and/or operations. 1610 // As long as jumps are not expensive, this should improve performance. 1611 // For example, instead of something like: 1612 // cmp A, B 1613 // C = seteq 1614 // cmp D, E 1615 // F = setle 1616 // or C, F 1617 // jnz foo 1618 // Emit: 1619 // cmp A, B 1620 // je foo 1621 // cmp D, E 1622 // jle foo 1623 // 1624 if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) { 1625 if (!DAG.getTargetLoweringInfo().isJumpExpensive() && 1626 BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || 1627 BOp->getOpcode() == Instruction::Or)) { 1628 FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB, 1629 BOp->getOpcode(), getEdgeWeight(BrMBB, Succ0MBB), 1630 getEdgeWeight(BrMBB, Succ1MBB)); 1631 // If the compares in later blocks need to use values not currently 1632 // exported from this block, export them now. This block should always 1633 // be the first entry. 1634 assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!"); 1635 1636 // Allow some cases to be rejected. 1637 if (ShouldEmitAsBranches(SwitchCases)) { 1638 for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) { 1639 ExportFromCurrentBlock(SwitchCases[i].CmpLHS); 1640 ExportFromCurrentBlock(SwitchCases[i].CmpRHS); 1641 } 1642 1643 // Emit the branch for this block. 1644 visitSwitchCase(SwitchCases[0], BrMBB); 1645 SwitchCases.erase(SwitchCases.begin()); 1646 return; 1647 } 1648 1649 // Okay, we decided not to do this, remove any inserted MBB's and clear 1650 // SwitchCases. 1651 for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) 1652 FuncInfo.MF->erase(SwitchCases[i].ThisBB); 1653 1654 SwitchCases.clear(); 1655 } 1656 } 1657 1658 // Create a CaseBlock record representing this branch. 1659 CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()), 1660 nullptr, Succ0MBB, Succ1MBB, BrMBB); 1661 1662 // Use visitSwitchCase to actually insert the fast branch sequence for this 1663 // cond branch. 1664 visitSwitchCase(CB, BrMBB); 1665 } 1666 1667 /// visitSwitchCase - Emits the necessary code to represent a single node in 1668 /// the binary search tree resulting from lowering a switch instruction. 1669 void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, 1670 MachineBasicBlock *SwitchBB) { 1671 SDValue Cond; 1672 SDValue CondLHS = getValue(CB.CmpLHS); 1673 SDLoc dl = getCurSDLoc(); 1674 1675 // Build the setcc now. 1676 if (!CB.CmpMHS) { 1677 // Fold "(X == true)" to X and "(X == false)" to !X to 1678 // handle common cases produced by branch lowering. 1679 if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) && 1680 CB.CC == ISD::SETEQ) 1681 Cond = CondLHS; 1682 else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) && 1683 CB.CC == ISD::SETEQ) { 1684 SDValue True = DAG.getConstant(1, CondLHS.getValueType()); 1685 Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True); 1686 } else 1687 Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); 1688 } else { 1689 assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); 1690 1691 const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue(); 1692 const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue(); 1693 1694 SDValue CmpOp = getValue(CB.CmpMHS); 1695 EVT VT = CmpOp.getValueType(); 1696 1697 if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) { 1698 Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), 1699 ISD::SETLE); 1700 } else { 1701 SDValue SUB = DAG.getNode(ISD::SUB, dl, 1702 VT, CmpOp, DAG.getConstant(Low, VT)); 1703 Cond = DAG.getSetCC(dl, MVT::i1, SUB, 1704 DAG.getConstant(High-Low, VT), ISD::SETULE); 1705 } 1706 } 1707 1708 // Update successor info 1709 addSuccessorWithWeight(SwitchBB, CB.TrueBB, CB.TrueWeight); 1710 // TrueBB and FalseBB are always different unless the incoming IR is 1711 // degenerate. This only happens when running llc on weird IR. 1712 if (CB.TrueBB != CB.FalseBB) 1713 addSuccessorWithWeight(SwitchBB, CB.FalseBB, CB.FalseWeight); 1714 1715 // If the lhs block is the next block, invert the condition so that we can 1716 // fall through to the lhs instead of the rhs block. 1717 if (CB.TrueBB == NextBlock(SwitchBB)) { 1718 std::swap(CB.TrueBB, CB.FalseBB); 1719 SDValue True = DAG.getConstant(1, Cond.getValueType()); 1720 Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True); 1721 } 1722 1723 SDValue BrCond = DAG.getNode(ISD::BRCOND, dl, 1724 MVT::Other, getControlRoot(), Cond, 1725 DAG.getBasicBlock(CB.TrueBB)); 1726 1727 // Insert the false branch. Do this even if it's a fall through branch, 1728 // this makes it easier to do DAG optimizations which require inverting 1729 // the branch condition. 1730 BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, 1731 DAG.getBasicBlock(CB.FalseBB)); 1732 1733 DAG.setRoot(BrCond); 1734 } 1735 1736 /// visitJumpTable - Emit JumpTable node in the current MBB 1737 void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { 1738 // Emit the code for the jump table 1739 assert(JT.Reg != -1U && "Should lower JT Header first!"); 1740 EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(); 1741 SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), 1742 JT.Reg, PTy); 1743 SDValue Table = DAG.getJumpTable(JT.JTI, PTy); 1744 SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), 1745 MVT::Other, Index.getValue(1), 1746 Table, Index); 1747 DAG.setRoot(BrJumpTable); 1748 } 1749 1750 /// visitJumpTableHeader - This function emits necessary code to produce index 1751 /// in the JumpTable from switch case. 1752 void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, 1753 JumpTableHeader &JTH, 1754 MachineBasicBlock *SwitchBB) { 1755 // Subtract the lowest switch case value from the value being switched on and 1756 // conditional branch to default mbb if the result is greater than the 1757 // difference between smallest and largest cases. 1758 SDValue SwitchOp = getValue(JTH.SValue); 1759 EVT VT = SwitchOp.getValueType(); 1760 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, 1761 DAG.getConstant(JTH.First, VT)); 1762 1763 // The SDNode we just created, which holds the value being switched on minus 1764 // the smallest case value, needs to be copied to a virtual register so it 1765 // can be used as an index into the jump table in a subsequent basic block. 1766 // This value may be smaller or larger than the target's pointer type, and 1767 // therefore require extension or truncating. 1768 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1769 SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI.getPointerTy()); 1770 1771 unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); 1772 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), 1773 JumpTableReg, SwitchOp); 1774 JT.Reg = JumpTableReg; 1775 1776 // Emit the range check for the jump table, and branch to the default block 1777 // for the switch statement if the value being switched on exceeds the largest 1778 // case in the switch. 1779 SDValue CMP = 1780 DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), 1781 Sub.getValueType()), 1782 Sub, DAG.getConstant(JTH.Last - JTH.First, VT), ISD::SETUGT); 1783 1784 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 1785 MVT::Other, CopyTo, CMP, 1786 DAG.getBasicBlock(JT.Default)); 1787 1788 // Avoid emitting unnecessary branches to the next block. 1789 if (JT.MBB != NextBlock(SwitchBB)) 1790 BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, 1791 DAG.getBasicBlock(JT.MBB)); 1792 1793 DAG.setRoot(BrCond); 1794 } 1795 1796 /// Codegen a new tail for a stack protector check ParentMBB which has had its 1797 /// tail spliced into a stack protector check success bb. 1798 /// 1799 /// For a high level explanation of how this fits into the stack protector 1800 /// generation see the comment on the declaration of class 1801 /// StackProtectorDescriptor. 1802 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, 1803 MachineBasicBlock *ParentBB) { 1804 1805 // First create the loads to the guard/stack slot for the comparison. 1806 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1807 EVT PtrTy = TLI.getPointerTy(); 1808 1809 MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); 1810 int FI = MFI->getStackProtectorIndex(); 1811 1812 const Value *IRGuard = SPD.getGuard(); 1813 SDValue GuardPtr = getValue(IRGuard); 1814 SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); 1815 1816 unsigned Align = 1817 TLI.getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); 1818 1819 SDValue Guard; 1820 1821 // If GuardReg is set and useLoadStackGuardNode returns true, retrieve the 1822 // guard value from the virtual register holding the value. Otherwise, emit a 1823 // volatile load to retrieve the stack guard value. 1824 unsigned GuardReg = SPD.getGuardReg(); 1825 1826 if (GuardReg && TLI.useLoadStackGuardNode()) 1827 Guard = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), GuardReg, 1828 PtrTy); 1829 else 1830 Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), 1831 GuardPtr, MachinePointerInfo(IRGuard, 0), 1832 true, false, false, Align); 1833 1834 SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), 1835 StackSlotPtr, 1836 MachinePointerInfo::getFixedStack(FI), 1837 true, false, false, Align); 1838 1839 // Perform the comparison via a subtract/getsetcc. 1840 EVT VT = Guard.getValueType(); 1841 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); 1842 1843 SDValue Cmp = 1844 DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), 1845 Sub.getValueType()), 1846 Sub, DAG.getConstant(0, VT), ISD::SETNE); 1847 1848 // If the sub is not 0, then we know the guard/stackslot do not equal, so 1849 // branch to failure MBB. 1850 SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 1851 MVT::Other, StackSlot.getOperand(0), 1852 Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); 1853 // Otherwise branch to success MBB. 1854 SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), 1855 MVT::Other, BrCond, 1856 DAG.getBasicBlock(SPD.getSuccessMBB())); 1857 1858 DAG.setRoot(Br); 1859 } 1860 1861 /// Codegen the failure basic block for a stack protector check. 1862 /// 1863 /// A failure stack protector machine basic block consists simply of a call to 1864 /// __stack_chk_fail(). 1865 /// 1866 /// For a high level explanation of how this fits into the stack protector 1867 /// generation see the comment on the declaration of class 1868 /// StackProtectorDescriptor. 1869 void 1870 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { 1871 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1872 SDValue Chain = 1873 TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid, 1874 nullptr, 0, false, getCurSDLoc(), false, false).second; 1875 DAG.setRoot(Chain); 1876 } 1877 1878 /// visitBitTestHeader - This function emits necessary code to produce value 1879 /// suitable for "bit tests" 1880 void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, 1881 MachineBasicBlock *SwitchBB) { 1882 // Subtract the minimum value 1883 SDValue SwitchOp = getValue(B.SValue); 1884 EVT VT = SwitchOp.getValueType(); 1885 SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, 1886 DAG.getConstant(B.First, VT)); 1887 1888 // Check range 1889 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1890 SDValue RangeCmp = 1891 DAG.getSetCC(getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), 1892 Sub.getValueType()), 1893 Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); 1894 1895 // Determine the type of the test operands. 1896 bool UsePtrType = false; 1897 if (!TLI.isTypeLegal(VT)) 1898 UsePtrType = true; 1899 else { 1900 for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) 1901 if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) { 1902 // Switch table case range are encoded into series of masks. 1903 // Just use pointer type, it's guaranteed to fit. 1904 UsePtrType = true; 1905 break; 1906 } 1907 } 1908 if (UsePtrType) { 1909 VT = TLI.getPointerTy(); 1910 Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); 1911 } 1912 1913 B.RegVT = VT.getSimpleVT(); 1914 B.Reg = FuncInfo.CreateReg(B.RegVT); 1915 SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), 1916 B.Reg, Sub); 1917 1918 MachineBasicBlock* MBB = B.Cases[0].ThisBB; 1919 1920 addSuccessorWithWeight(SwitchBB, B.Default); 1921 addSuccessorWithWeight(SwitchBB, MBB); 1922 1923 SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 1924 MVT::Other, CopyTo, RangeCmp, 1925 DAG.getBasicBlock(B.Default)); 1926 1927 // Avoid emitting unnecessary branches to the next block. 1928 if (MBB != NextBlock(SwitchBB)) 1929 BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, 1930 DAG.getBasicBlock(MBB)); 1931 1932 DAG.setRoot(BrRange); 1933 } 1934 1935 /// visitBitTestCase - this function produces one "bit test" 1936 void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, 1937 MachineBasicBlock* NextMBB, 1938 uint32_t BranchWeightToNext, 1939 unsigned Reg, 1940 BitTestCase &B, 1941 MachineBasicBlock *SwitchBB) { 1942 MVT VT = BB.RegVT; 1943 SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), 1944 Reg, VT); 1945 SDValue Cmp; 1946 unsigned PopCount = countPopulation(B.Mask); 1947 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1948 if (PopCount == 1) { 1949 // Testing for a single bit; just compare the shift count with what it 1950 // would need to be to shift a 1 bit in that position. 1951 Cmp = DAG.getSetCC( 1952 getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, 1953 DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); 1954 } else if (PopCount == BB.Range) { 1955 // There is only one zero bit in the range, test for it directly. 1956 Cmp = DAG.getSetCC( 1957 getCurSDLoc(), TLI.getSetCCResultType(*DAG.getContext(), VT), ShiftOp, 1958 DAG.getConstant(countTrailingOnes(B.Mask), VT), ISD::SETNE); 1959 } else { 1960 // Make desired shift 1961 SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, 1962 DAG.getConstant(1, VT), ShiftOp); 1963 1964 // Emit bit tests and jumps 1965 SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), 1966 VT, SwitchVal, DAG.getConstant(B.Mask, VT)); 1967 Cmp = DAG.getSetCC(getCurSDLoc(), 1968 TLI.getSetCCResultType(*DAG.getContext(), VT), AndOp, 1969 DAG.getConstant(0, VT), ISD::SETNE); 1970 } 1971 1972 // The branch weight from SwitchBB to B.TargetBB is B.ExtraWeight. 1973 addSuccessorWithWeight(SwitchBB, B.TargetBB, B.ExtraWeight); 1974 // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. 1975 addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); 1976 1977 SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), 1978 MVT::Other, getControlRoot(), 1979 Cmp, DAG.getBasicBlock(B.TargetBB)); 1980 1981 // Avoid emitting unnecessary branches to the next block. 1982 if (NextMBB != NextBlock(SwitchBB)) 1983 BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, 1984 DAG.getBasicBlock(NextMBB)); 1985 1986 DAG.setRoot(BrAnd); 1987 } 1988 1989 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { 1990 MachineBasicBlock *InvokeMBB = FuncInfo.MBB; 1991 1992 // Retrieve successors. 1993 MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; 1994 MachineBasicBlock *LandingPad = FuncInfo.MBBMap[I.getSuccessor(1)]; 1995 1996 const Value *Callee(I.getCalledValue()); 1997 const Function *Fn = dyn_cast<Function>(Callee); 1998 if (isa<InlineAsm>(Callee)) 1999 visitInlineAsm(&I); 2000 else if (Fn && Fn->isIntrinsic()) { 2001 switch (Fn->getIntrinsicID()) { 2002 default: 2003 llvm_unreachable("Cannot invoke this intrinsic"); 2004 case Intrinsic::donothing: 2005 // Ignore invokes to @llvm.donothing: jump directly to the next BB. 2006 break; 2007 case Intrinsic::experimental_patchpoint_void: 2008 case Intrinsic::experimental_patchpoint_i64: 2009 visitPatchpoint(&I, LandingPad); 2010 break; 2011 case Intrinsic::experimental_gc_statepoint: 2012 LowerStatepoint(ImmutableStatepoint(&I), LandingPad); 2013 break; 2014 } 2015 } else 2016 LowerCallTo(&I, getValue(Callee), false, LandingPad); 2017 2018 // If the value of the invoke is used outside of its defining block, make it 2019 // available as a virtual register. 2020 // We already took care of the exported value for the statepoint instruction 2021 // during call to the LowerStatepoint. 2022 if (!isStatepoint(I)) { 2023 CopyToExportRegsIfNeeded(&I); 2024 } 2025 2026 // Update successor info 2027 addSuccessorWithWeight(InvokeMBB, Return); 2028 addSuccessorWithWeight(InvokeMBB, LandingPad); 2029 2030 // Drop into normal successor. 2031 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), 2032 MVT::Other, getControlRoot(), 2033 DAG.getBasicBlock(Return))); 2034 } 2035 2036 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) { 2037 llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!"); 2038 } 2039 2040 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { 2041 assert(FuncInfo.MBB->isLandingPad() && 2042 "Call to landingpad not in landing pad!"); 2043 2044 MachineBasicBlock *MBB = FuncInfo.MBB; 2045 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 2046 AddLandingPadInfo(LP, MMI, MBB); 2047 2048 // If there aren't registers to copy the values into (e.g., during SjLj 2049 // exceptions), then don't bother to create these DAG nodes. 2050 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2051 if (TLI.getExceptionPointerRegister() == 0 && 2052 TLI.getExceptionSelectorRegister() == 0) 2053 return; 2054 2055 SmallVector<EVT, 2> ValueVTs; 2056 ComputeValueVTs(TLI, LP.getType(), ValueVTs); 2057 assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); 2058 2059 // Get the two live-in registers as SDValues. The physregs have already been 2060 // copied into virtual registers. 2061 SDValue Ops[2]; 2062 if (FuncInfo.ExceptionPointerVirtReg) { 2063 Ops[0] = DAG.getZExtOrTrunc( 2064 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), 2065 FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), 2066 getCurSDLoc(), ValueVTs[0]); 2067 } else { 2068 Ops[0] = DAG.getConstant(0, TLI.getPointerTy()); 2069 } 2070 Ops[1] = DAG.getZExtOrTrunc( 2071 DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), 2072 FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), 2073 getCurSDLoc(), ValueVTs[1]); 2074 2075 // Merge into one. 2076 SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 2077 DAG.getVTList(ValueVTs), Ops); 2078 setValue(&LP, Res); 2079 } 2080 2081 unsigned 2082 SelectionDAGBuilder::visitLandingPadClauseBB(GlobalValue *ClauseGV, 2083 MachineBasicBlock *LPadBB) { 2084 SDValue Chain = getControlRoot(); 2085 2086 // Get the typeid that we will dispatch on later. 2087 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2088 const TargetRegisterClass *RC = TLI.getRegClassFor(TLI.getPointerTy()); 2089 unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC); 2090 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(ClauseGV); 2091 SDValue Sel = DAG.getConstant(TypeID, TLI.getPointerTy()); 2092 Chain = DAG.getCopyToReg(Chain, getCurSDLoc(), VReg, Sel); 2093 2094 // Branch to the main landing pad block. 2095 MachineBasicBlock *ClauseMBB = FuncInfo.MBB; 2096 ClauseMBB->addSuccessor(LPadBB); 2097 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, Chain, 2098 DAG.getBasicBlock(LPadBB))); 2099 return VReg; 2100 } 2101 2102 /// handleSmallSwitchCaseRange - Emit a series of specific tests (suitable for 2103 /// small case ranges). 2104 bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, 2105 CaseRecVector& WorkList, 2106 const Value* SV, 2107 MachineBasicBlock *Default, 2108 MachineBasicBlock *SwitchBB) { 2109 // Size is the number of Cases represented by this range. 2110 size_t Size = CR.Range.second - CR.Range.first; 2111 if (Size > 3) 2112 return false; 2113 2114 // Get the MachineFunction which holds the current MBB. This is used when 2115 // inserting any additional MBBs necessary to represent the switch. 2116 MachineFunction *CurMF = FuncInfo.MF; 2117 2118 // Figure out which block is immediately after the current one. 2119 MachineBasicBlock *NextMBB = nullptr; 2120 MachineFunction::iterator BBI = CR.CaseBB; 2121 if (++BBI != FuncInfo.MF->end()) 2122 NextMBB = BBI; 2123 2124 BranchProbabilityInfo *BPI = FuncInfo.BPI; 2125 // If any two of the cases has the same destination, and if one value 2126 // is the same as the other, but has one bit unset that the other has set, 2127 // use bit manipulation to do two compares at once. For example: 2128 // "if (X == 6 || X == 4)" -> "if ((X|2) == 6)" 2129 // TODO: This could be extended to merge any 2 cases in switches with 3 cases. 2130 // TODO: Handle cases where CR.CaseBB != SwitchBB. 2131 if (Size == 2 && CR.CaseBB == SwitchBB) { 2132 Case &Small = *CR.Range.first; 2133 Case &Big = *(CR.Range.second-1); 2134 2135 if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) { 2136 const APInt& SmallValue = Small.Low->getValue(); 2137 const APInt& BigValue = Big.Low->getValue(); 2138 2139 // Check that there is only one bit different. 2140 if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 && 2141 (SmallValue | BigValue) == BigValue) { 2142 // Isolate the common bit. 2143 APInt CommonBit = BigValue & ~SmallValue; 2144 assert((SmallValue | CommonBit) == BigValue && 2145 CommonBit.countPopulation() == 1 && "Not a common bit?"); 2146 2147 SDValue CondLHS = getValue(SV); 2148 EVT VT = CondLHS.getValueType(); 2149 SDLoc DL = getCurSDLoc(); 2150 2151 SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, 2152 DAG.getConstant(CommonBit, VT)); 2153 SDValue Cond = DAG.getSetCC(DL, MVT::i1, 2154 Or, DAG.getConstant(BigValue, VT), 2155 ISD::SETEQ); 2156 2157 // Update successor info. 2158 // Both Small and Big will jump to Small.BB, so we sum up the weights. 2159 addSuccessorWithWeight(SwitchBB, Small.BB, 2160 Small.ExtraWeight + Big.ExtraWeight); 2161 addSuccessorWithWeight(SwitchBB, Default, 2162 // The default destination is the first successor in IR. 2163 BPI ? BPI->getEdgeWeight(SwitchBB->getBasicBlock(), (unsigned)0) : 0); 2164 2165 // Insert the true branch. 2166 SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other, 2167 getControlRoot(), Cond, 2168 DAG.getBasicBlock(Small.BB)); 2169 2170 // Insert the false branch. 2171 BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond, 2172 DAG.getBasicBlock(Default)); 2173 2174 DAG.setRoot(BrCond); 2175 return true; 2176 } 2177 } 2178 } 2179 2180 // Order cases by weight so the most likely case will be checked first. 2181 uint32_t UnhandledWeights = 0; 2182 if (BPI) { 2183 for (CaseItr I = CR.Range.first, IE = CR.Range.second; I != IE; ++I) { 2184 uint32_t IWeight = I->ExtraWeight; 2185 UnhandledWeights += IWeight; 2186 for (CaseItr J = CR.Range.first; J < I; ++J) { 2187 uint32_t JWeight = J->ExtraWeight; 2188 if (IWeight > JWeight) 2189 std::swap(*I, *J); 2190 } 2191 } 2192 } 2193 // Rearrange the case blocks so that the last one falls through if possible. 2194 Case &BackCase = *(CR.Range.second-1); 2195 if (Size > 1 && NextMBB && Default != NextMBB && BackCase.BB != NextMBB) { 2196 // The last case block won't fall through into 'NextMBB' if we emit the 2197 // branches in this order. See if rearranging a case value would help. 2198 // We start at the bottom as it's the case with the least weight. 2199 for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) 2200 if (I->BB == NextMBB) { 2201 std::swap(*I, BackCase); 2202 break; 2203 } 2204 } 2205 2206 // Create a CaseBlock record representing a conditional branch to 2207 // the Case's target mbb if the value being switched on SV is equal 2208 // to C. 2209 MachineBasicBlock *CurBlock = CR.CaseBB; 2210 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { 2211 MachineBasicBlock *FallThrough; 2212 if (I != E-1) { 2213 FallThrough = CurMF->CreateMachineBasicBlock(CurBlock->getBasicBlock()); 2214 CurMF->insert(BBI, FallThrough); 2215 2216 // Put SV in a virtual register to make it available from the new blocks. 2217 ExportFromCurrentBlock(SV); 2218 } else { 2219 // If the last case doesn't match, go to the default block. 2220 FallThrough = Default; 2221 } 2222 2223 const Value *RHS, *LHS, *MHS; 2224 ISD::CondCode CC; 2225 if (I->High == I->Low) { 2226 // This is just small small case range :) containing exactly 1 case 2227 CC = ISD::SETEQ; 2228 LHS = SV; RHS = I->High; MHS = nullptr; 2229 } else { 2230 CC = ISD::SETLE; 2231 LHS = I->Low; MHS = SV; RHS = I->High; 2232 } 2233 2234 // The false weight should be sum of all un-handled cases. 2235 UnhandledWeights -= I->ExtraWeight; 2236 CaseBlock CB(CC, LHS, RHS, MHS, /* truebb */ I->BB, /* falsebb */ FallThrough, 2237 /* me */ CurBlock, 2238 /* trueweight */ I->ExtraWeight, 2239 /* falseweight */ UnhandledWeights); 2240 2241 // If emitting the first comparison, just call visitSwitchCase to emit the 2242 // code into the current block. Otherwise, push the CaseBlock onto the 2243 // vector to be later processed by SDISel, and insert the node's MBB 2244 // before the next MBB. 2245 if (CurBlock == SwitchBB) 2246 visitSwitchCase(CB, SwitchBB); 2247 else 2248 SwitchCases.push_back(CB); 2249 2250 CurBlock = FallThrough; 2251 } 2252 2253 return true; 2254 } 2255 2256 static inline bool areJTsAllowed(const TargetLowering &TLI) { 2257 return TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 2258 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other); 2259 } 2260 2261 static APInt ComputeRange(const APInt &First, const APInt &Last) { 2262 uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; 2263 APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); 2264 return (LastExt - FirstExt + 1ULL); 2265 } 2266 2267 /// handleJTSwitchCase - Emit jumptable for current switch case range 2268 bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, 2269 CaseRecVector &WorkList, 2270 const Value *SV, 2271 MachineBasicBlock *Default, 2272 MachineBasicBlock *SwitchBB) { 2273 Case& FrontCase = *CR.Range.first; 2274 Case& BackCase = *(CR.Range.second-1); 2275 2276 const APInt &First = FrontCase.Low->getValue(); 2277 const APInt &Last = BackCase.High->getValue(); 2278 2279 APInt TSize(First.getBitWidth(), 0); 2280 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) 2281 TSize += I->size(); 2282 2283 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2284 if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) 2285 return false; 2286 2287 APInt Range = ComputeRange(First, Last); 2288 // The density is TSize / Range. Require at least 40%. 2289 // It should not be possible for IntTSize to saturate for sane code, but make 2290 // sure we handle Range saturation correctly. 2291 uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); 2292 uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); 2293 if (IntTSize * 10 < IntRange * 4) 2294 return false; 2295 2296 DEBUG(dbgs() << "Lowering jump table\n" 2297 << "First entry: " << First << ". Last entry: " << Last << '\n' 2298 << "Range: " << Range << ". Size: " << TSize << ".\n\n"); 2299 2300 // Get the MachineFunction which holds the current MBB. This is used when 2301 // inserting any additional MBBs necessary to represent the switch. 2302 MachineFunction *CurMF = FuncInfo.MF; 2303 2304 // Figure out which block is immediately after the current one. 2305 MachineFunction::iterator BBI = CR.CaseBB; 2306 ++BBI; 2307 2308 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 2309 2310 // Create a new basic block to hold the code for loading the address 2311 // of the jump table, and jumping to it. Update successor information; 2312 // we will either branch to the default case for the switch, or the jump 2313 // table. 2314 MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB); 2315 CurMF->insert(BBI, JumpTableBB); 2316 2317 addSuccessorWithWeight(CR.CaseBB, Default); 2318 addSuccessorWithWeight(CR.CaseBB, JumpTableBB); 2319 2320 // Build a vector of destination BBs, corresponding to each target 2321 // of the jump table. If the value of the jump table slot corresponds to 2322 // a case statement, push the case's BB onto the vector, otherwise, push 2323 // the default BB. 2324 std::vector<MachineBasicBlock*> DestBBs; 2325 APInt TEI = First; 2326 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++TEI) { 2327 const APInt &Low = I->Low->getValue(); 2328 const APInt &High = I->High->getValue(); 2329 2330 if (Low.sle(TEI) && TEI.sle(High)) { 2331 DestBBs.push_back(I->BB); 2332 if (TEI==High) 2333 ++I; 2334 } else { 2335 DestBBs.push_back(Default); 2336 } 2337 } 2338 2339 // Calculate weight for each unique destination in CR. 2340 DenseMap<MachineBasicBlock*, uint32_t> DestWeights; 2341 if (FuncInfo.BPI) { 2342 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) 2343 DestWeights[I->BB] += I->ExtraWeight; 2344 } 2345 2346 // Update successor info. Add one edge to each unique successor. 2347 BitVector SuccsHandled(CR.CaseBB->getParent()->getNumBlockIDs()); 2348 for (MachineBasicBlock *DestBB : DestBBs) { 2349 if (!SuccsHandled[DestBB->getNumber()]) { 2350 SuccsHandled[DestBB->getNumber()] = true; 2351 auto I = DestWeights.find(DestBB); 2352 addSuccessorWithWeight(JumpTableBB, DestBB, 2353 I != DestWeights.end() ? I->second : 0); 2354 } 2355 } 2356 2357 // Create a jump table index for this jump table. 2358 unsigned JTEncoding = TLI.getJumpTableEncoding(); 2359 unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) 2360 ->createJumpTableIndex(DestBBs); 2361 2362 // Set the jump table information so that we can codegen it as a second 2363 // MachineBasicBlock 2364 JumpTable JT(-1U, JTI, JumpTableBB, Default); 2365 JumpTableHeader JTH(First, Last, SV, CR.CaseBB, (CR.CaseBB == SwitchBB)); 2366 if (CR.CaseBB == SwitchBB) 2367 visitJumpTableHeader(JT, JTH, SwitchBB); 2368 2369 JTCases.push_back(JumpTableBlock(JTH, JT)); 2370 return true; 2371 } 2372 2373 /// handleBTSplitSwitchCase - emit comparison and split binary search tree into 2374 /// 2 subtrees. 2375 bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, 2376 CaseRecVector& WorkList, 2377 const Value* SV, 2378 MachineBasicBlock* SwitchBB) { 2379 Case& FrontCase = *CR.Range.first; 2380 Case& BackCase = *(CR.Range.second-1); 2381 2382 // Size is the number of Cases represented by this range. 2383 unsigned Size = CR.Range.second - CR.Range.first; 2384 2385 const APInt &First = FrontCase.Low->getValue(); 2386 const APInt &Last = BackCase.High->getValue(); 2387 double FMetric = 0; 2388 CaseItr Pivot = CR.Range.first + Size/2; 2389 2390 // Select optimal pivot, maximizing sum density of LHS and RHS. This will 2391 // (heuristically) allow us to emit JumpTable's later. 2392 APInt TSize(First.getBitWidth(), 0); 2393 for (CaseItr I = CR.Range.first, E = CR.Range.second; 2394 I!=E; ++I) 2395 TSize += I->size(); 2396 2397 APInt LSize = FrontCase.size(); 2398 APInt RSize = TSize-LSize; 2399 DEBUG(dbgs() << "Selecting best pivot: \n" 2400 << "First: " << First << ", Last: " << Last <<'\n' 2401 << "LSize: " << LSize << ", RSize: " << RSize << '\n'); 2402 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2403 for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second; 2404 J!=E; ++I, ++J) { 2405 const APInt &LEnd = I->High->getValue(); 2406 const APInt &RBegin = J->Low->getValue(); 2407 APInt Range = ComputeRange(LEnd, RBegin); 2408 assert((Range - 2ULL).isNonNegative() && 2409 "Invalid case distance"); 2410 // Use volatile double here to avoid excess precision issues on some hosts, 2411 // e.g. that use 80-bit X87 registers. 2412 // Only consider the density of sub-ranges that actually have sufficient 2413 // entries to be lowered as a jump table. 2414 volatile double LDensity = 2415 LSize.ult(TLI.getMinimumJumpTableEntries()) 2416 ? 0.0 2417 : LSize.roundToDouble() / (LEnd - First + 1ULL).roundToDouble(); 2418 volatile double RDensity = 2419 RSize.ult(TLI.getMinimumJumpTableEntries()) 2420 ? 0.0 2421 : RSize.roundToDouble() / (Last - RBegin + 1ULL).roundToDouble(); 2422 volatile double Metric = Range.logBase2() * (LDensity + RDensity); 2423 // Should always split in some non-trivial place 2424 DEBUG(dbgs() <<"=>Step\n" 2425 << "LEnd: " << LEnd << ", RBegin: " << RBegin << '\n' 2426 << "LDensity: " << LDensity 2427 << ", RDensity: " << RDensity << '\n' 2428 << "Metric: " << Metric << '\n'); 2429 if (FMetric < Metric) { 2430 Pivot = J; 2431 FMetric = Metric; 2432 DEBUG(dbgs() << "Current metric set to: " << FMetric << '\n'); 2433 } 2434 2435 LSize += J->size(); 2436 RSize -= J->size(); 2437 } 2438 2439 if (FMetric == 0 || !areJTsAllowed(TLI)) 2440 Pivot = CR.Range.first + Size/2; 2441 splitSwitchCase(CR, Pivot, WorkList, SV, SwitchBB); 2442 return true; 2443 } 2444 2445 void SelectionDAGBuilder::splitSwitchCase(CaseRec &CR, CaseItr Pivot, 2446 CaseRecVector &WorkList, 2447 const Value *SV, 2448 MachineBasicBlock *SwitchBB) { 2449 // Get the MachineFunction which holds the current MBB. This is used when 2450 // inserting any additional MBBs necessary to represent the switch. 2451 MachineFunction *CurMF = FuncInfo.MF; 2452 2453 // Figure out which block is immediately after the current one. 2454 MachineFunction::iterator BBI = CR.CaseBB; 2455 ++BBI; 2456 2457 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 2458 2459 CaseRange LHSR(CR.Range.first, Pivot); 2460 CaseRange RHSR(Pivot, CR.Range.second); 2461 const ConstantInt *C = Pivot->Low; 2462 MachineBasicBlock *FalseBB = nullptr, *TrueBB = nullptr; 2463 2464 // We know that we branch to the LHS if the Value being switched on is 2465 // less than the Pivot value, C. We use this to optimize our binary 2466 // tree a bit, by recognizing that if SV is greater than or equal to the 2467 // LHS's Case Value, and that Case Value is exactly one less than the 2468 // Pivot's Value, then we can branch directly to the LHS's Target, 2469 // rather than creating a leaf node for it. 2470 if ((LHSR.second - LHSR.first) == 1 && LHSR.first->High == CR.GE && 2471 C->getValue() == (CR.GE->getValue() + 1LL)) { 2472 TrueBB = LHSR.first->BB; 2473 } else { 2474 TrueBB = CurMF->CreateMachineBasicBlock(LLVMBB); 2475 CurMF->insert(BBI, TrueBB); 2476 WorkList.push_back(CaseRec(TrueBB, C, CR.GE, LHSR)); 2477 2478 // Put SV in a virtual register to make it available from the new blocks. 2479 ExportFromCurrentBlock(SV); 2480 } 2481 2482 // Similar to the optimization above, if the Value being switched on is 2483 // known to be less than the Constant CR.LT, and the current Case Value 2484 // is CR.LT - 1, then we can branch directly to the target block for 2485 // the current Case Value, rather than emitting a RHS leaf node for it. 2486 if ((RHSR.second - RHSR.first) == 1 && CR.LT && 2487 RHSR.first->Low->getValue() == (CR.LT->getValue() - 1LL)) { 2488 FalseBB = RHSR.first->BB; 2489 } else { 2490 FalseBB = CurMF->CreateMachineBasicBlock(LLVMBB); 2491 CurMF->insert(BBI, FalseBB); 2492 WorkList.push_back(CaseRec(FalseBB, CR.LT, C, RHSR)); 2493 2494 // Put SV in a virtual register to make it available from the new blocks. 2495 ExportFromCurrentBlock(SV); 2496 } 2497 2498 // Create a CaseBlock record representing a conditional branch to 2499 // the LHS node if the value being switched on SV is less than C. 2500 // Otherwise, branch to LHS. 2501 CaseBlock CB(ISD::SETLT, SV, C, nullptr, TrueBB, FalseBB, CR.CaseBB); 2502 2503 if (CR.CaseBB == SwitchBB) 2504 visitSwitchCase(CB, SwitchBB); 2505 else 2506 SwitchCases.push_back(CB); 2507 } 2508 2509 /// handleBitTestsSwitchCase - if current case range has few destination and 2510 /// range span less, than machine word bitwidth, encode case range into series 2511 /// of masks and emit bit tests with these masks. 2512 bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, 2513 CaseRecVector& WorkList, 2514 const Value* SV, 2515 MachineBasicBlock* Default, 2516 MachineBasicBlock* SwitchBB) { 2517 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2518 EVT PTy = TLI.getPointerTy(); 2519 unsigned IntPtrBits = PTy.getSizeInBits(); 2520 2521 Case& FrontCase = *CR.Range.first; 2522 Case& BackCase = *(CR.Range.second-1); 2523 2524 // Get the MachineFunction which holds the current MBB. This is used when 2525 // inserting any additional MBBs necessary to represent the switch. 2526 MachineFunction *CurMF = FuncInfo.MF; 2527 2528 // If target does not have legal shift left, do not emit bit tests at all. 2529 if (!TLI.isOperationLegal(ISD::SHL, PTy)) 2530 return false; 2531 2532 size_t numCmps = 0; 2533 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { 2534 // Single case counts one, case range - two. 2535 numCmps += (I->Low == I->High ? 1 : 2); 2536 } 2537 2538 // Count unique destinations 2539 SmallSet<MachineBasicBlock*, 4> Dests; 2540 for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { 2541 Dests.insert(I->BB); 2542 if (Dests.size() > 3) 2543 // Don't bother the code below, if there are too much unique destinations 2544 return false; 2545 } 2546 DEBUG(dbgs() << "Total number of unique destinations: " 2547 << Dests.size() << '\n' 2548 << "Total number of comparisons: " << numCmps << '\n'); 2549 2550 // Compute span of values. 2551 const APInt& minValue = FrontCase.Low->getValue(); 2552 const APInt& maxValue = BackCase.High->getValue(); 2553 APInt cmpRange = maxValue - minValue; 2554 2555 DEBUG(dbgs() << "Compare range: " << cmpRange << '\n' 2556 << "Low bound: " << minValue << '\n' 2557 << "High bound: " << maxValue << '\n'); 2558 2559 if (cmpRange.uge(IntPtrBits) || 2560 (!(Dests.size() == 1 && numCmps >= 3) && 2561 !(Dests.size() == 2 && numCmps >= 5) && 2562 !(Dests.size() >= 3 && numCmps >= 6))) 2563 return false; 2564 2565 DEBUG(dbgs() << "Emitting bit tests\n"); 2566 APInt lowBound = APInt::getNullValue(cmpRange.getBitWidth()); 2567 2568 // Optimize the case where all the case values fit in a 2569 // word without having to subtract minValue. In this case, 2570 // we can optimize away the subtraction. 2571 if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { 2572 cmpRange = maxValue; 2573 } else { 2574 lowBound = minValue; 2575 } 2576 2577 CaseBitsVector CasesBits; 2578 unsigned i, count = 0; 2579 2580 for (CaseItr I = CR.Range.first, E = CR.Range.second; I!=E; ++I) { 2581 MachineBasicBlock* Dest = I->BB; 2582 for (i = 0; i < count; ++i) 2583 if (Dest == CasesBits[i].BB) 2584 break; 2585 2586 if (i == count) { 2587 assert((count < 3) && "Too much destinations to test!"); 2588 CasesBits.push_back(CaseBits(0, Dest, 0, 0/*Weight*/)); 2589 count++; 2590 } 2591 2592 const APInt& lowValue = I->Low->getValue(); 2593 const APInt& highValue = I->High->getValue(); 2594 2595 uint64_t lo = (lowValue - lowBound).getZExtValue(); 2596 uint64_t hi = (highValue - lowBound).getZExtValue(); 2597 CasesBits[i].ExtraWeight += I->ExtraWeight; 2598 2599 for (uint64_t j = lo; j <= hi; j++) { 2600 CasesBits[i].Mask |= 1ULL << j; 2601 CasesBits[i].Bits++; 2602 } 2603 2604 } 2605 std::sort(CasesBits.begin(), CasesBits.end(), CaseBitsCmp()); 2606 2607 BitTestInfo BTC; 2608 2609 // Figure out which block is immediately after the current one. 2610 MachineFunction::iterator BBI = CR.CaseBB; 2611 ++BBI; 2612 2613 const BasicBlock *LLVMBB = CR.CaseBB->getBasicBlock(); 2614 2615 DEBUG(dbgs() << "Cases:\n"); 2616 for (unsigned i = 0, e = CasesBits.size(); i!=e; ++i) { 2617 DEBUG(dbgs() << "Mask: " << CasesBits[i].Mask 2618 << ", Bits: " << CasesBits[i].Bits 2619 << ", BB: " << CasesBits[i].BB << '\n'); 2620 2621 MachineBasicBlock *CaseBB = CurMF->CreateMachineBasicBlock(LLVMBB); 2622 CurMF->insert(BBI, CaseBB); 2623 BTC.push_back(BitTestCase(CasesBits[i].Mask, 2624 CaseBB, 2625 CasesBits[i].BB, CasesBits[i].ExtraWeight)); 2626 2627 // Put SV in a virtual register to make it available from the new blocks. 2628 ExportFromCurrentBlock(SV); 2629 } 2630 2631 BitTestBlock BTB(lowBound, cmpRange, SV, 2632 -1U, MVT::Other, (CR.CaseBB == SwitchBB), 2633 CR.CaseBB, Default, std::move(BTC)); 2634 2635 if (CR.CaseBB == SwitchBB) 2636 visitBitTestHeader(BTB, SwitchBB); 2637 2638 BitTestCases.push_back(std::move(BTB)); 2639 2640 return true; 2641 } 2642 2643 void SelectionDAGBuilder::Clusterify(CaseVector &Cases, const SwitchInst *SI) { 2644 BranchProbabilityInfo *BPI = FuncInfo.BPI; 2645 2646 // Extract cases from the switch and sort them. 2647 typedef std::pair<const ConstantInt*, unsigned> CasePair; 2648 std::vector<CasePair> Sorted; 2649 Sorted.reserve(SI->getNumCases()); 2650 for (auto I : SI->cases()) 2651 Sorted.push_back(std::make_pair(I.getCaseValue(), I.getSuccessorIndex())); 2652 std::sort(Sorted.begin(), Sorted.end(), [](CasePair a, CasePair b) { 2653 return a.first->getValue().slt(b.first->getValue()); 2654 }); 2655 2656 // Merge adjacent cases with the same destination, build Cases vector. 2657 assert(Cases.empty() && "Cases should be empty before Clusterify;"); 2658 Cases.reserve(SI->getNumCases()); 2659 MachineBasicBlock *PreviousSucc = nullptr; 2660 for (CasePair &CP : Sorted) { 2661 const ConstantInt *CaseVal = CP.first; 2662 unsigned SuccIndex = CP.second; 2663 MachineBasicBlock *Succ = FuncInfo.MBBMap[SI->getSuccessor(SuccIndex)]; 2664 uint32_t Weight = BPI ? BPI->getEdgeWeight(SI->getParent(), SuccIndex) : 0; 2665 2666 if (PreviousSucc == Succ && 2667 (CaseVal->getValue() - Cases.back().High->getValue()) == 1) { 2668 // If this case has the same successor and is a neighbour, merge it into 2669 // the previous cluster. 2670 Cases.back().High = CaseVal; 2671 Cases.back().ExtraWeight += Weight; 2672 } else { 2673 Cases.push_back(Case(CaseVal, CaseVal, Succ, Weight)); 2674 } 2675 2676 PreviousSucc = Succ; 2677 } 2678 2679 DEBUG({ 2680 size_t numCmps = 0; 2681 for (auto &I : Cases) 2682 // A range counts double, since it requires two compares. 2683 numCmps += I.Low != I.High ? 2 : 1; 2684 2685 dbgs() << "Clusterify finished. Total clusters: " << Cases.size() 2686 << ". Total compares: " << numCmps << '\n'; 2687 }); 2688 } 2689 2690 void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First, 2691 MachineBasicBlock *Last) { 2692 // Update JTCases. 2693 for (unsigned i = 0, e = JTCases.size(); i != e; ++i) 2694 if (JTCases[i].first.HeaderBB == First) 2695 JTCases[i].first.HeaderBB = Last; 2696 2697 // Update BitTestCases. 2698 for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i) 2699 if (BitTestCases[i].Parent == First) 2700 BitTestCases[i].Parent = Last; 2701 } 2702 2703 void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { 2704 MachineBasicBlock *SwitchMBB = FuncInfo.MBB; 2705 2706 // Create a vector of Cases, sorted so that we can efficiently create a binary 2707 // search tree from them. 2708 CaseVector Cases; 2709 Clusterify(Cases, &SI); 2710 2711 // Get the default destination MBB. 2712 MachineBasicBlock *Default = FuncInfo.MBBMap[SI.getDefaultDest()]; 2713 2714 if (isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg()) && 2715 !Cases.empty()) { 2716 // Replace an unreachable default destination with the most popular case 2717 // destination. 2718 DenseMap<const BasicBlock *, unsigned> Popularity; 2719 unsigned MaxPop = 0; 2720 const BasicBlock *MaxBB = nullptr; 2721 for (auto I : SI.cases()) { 2722 const BasicBlock *BB = I.getCaseSuccessor(); 2723 if (++Popularity[BB] > MaxPop) { 2724 MaxPop = Popularity[BB]; 2725 MaxBB = BB; 2726 } 2727 } 2728 2729 // Set new default. 2730 assert(MaxPop > 0); 2731 assert(MaxBB); 2732 Default = FuncInfo.MBBMap[MaxBB]; 2733 2734 // Remove cases that were pointing to the destination that is now the default. 2735 Cases.erase(std::remove_if(Cases.begin(), Cases.end(), 2736 [&](const Case &C) { return C.BB == Default; }), 2737 Cases.end()); 2738 } 2739 2740 // If there is only the default destination, go there directly. 2741 if (Cases.empty()) { 2742 // Update machine-CFG edges. 2743 SwitchMBB->addSuccessor(Default); 2744 2745 // If this is not a fall-through branch, emit the branch. 2746 if (Default != NextBlock(SwitchMBB)) { 2747 DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, 2748 getControlRoot(), DAG.getBasicBlock(Default))); 2749 } 2750 return; 2751 } 2752 2753 // Get the Value to be switched on. 2754 const Value *SV = SI.getCondition(); 2755 2756 // Push the initial CaseRec onto the worklist 2757 CaseRecVector WorkList; 2758 WorkList.push_back(CaseRec(SwitchMBB,nullptr,nullptr, 2759 CaseRange(Cases.begin(),Cases.end()))); 2760 2761 while (!WorkList.empty()) { 2762 // Grab a record representing a case range to process off the worklist 2763 CaseRec CR = WorkList.back(); 2764 WorkList.pop_back(); 2765 2766 if (handleBitTestsSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) 2767 continue; 2768 2769 // If the range has few cases (two or less) emit a series of specific 2770 // tests. 2771 if (handleSmallSwitchRange(CR, WorkList, SV, Default, SwitchMBB)) 2772 continue; 2773 2774 // If the switch has more than N blocks, and is at least 40% dense, and the 2775 // target supports indirect branches, then emit a jump table rather than 2776 // lowering the switch to a binary tree of conditional branches. 2777 // N defaults to 4 and is controlled via TLS.getMinimumJumpTableEntries(). 2778 if (handleJTSwitchCase(CR, WorkList, SV, Default, SwitchMBB)) 2779 continue; 2780 2781 // Emit binary tree. We need to pick a pivot, and push left and right ranges 2782 // onto the worklist. Leafs are handled via handleSmallSwitchRange() call. 2783 handleBTSplitSwitchCase(CR, WorkList, SV, SwitchMBB); 2784 } 2785 } 2786 2787 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { 2788 MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; 2789 2790 // Update machine-CFG edges with unique successors. 2791 SmallSet<BasicBlock*, 32> Done; 2792 for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) { 2793 BasicBlock *BB = I.getSuccessor(i); 2794 bool Inserted = Done.insert(BB).second; 2795 if (!Inserted) 2796 continue; 2797 2798 MachineBasicBlock *Succ = FuncInfo.MBBMap[BB]; 2799 addSuccessorWithWeight(IndirectBrMBB, Succ); 2800 } 2801 2802 DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), 2803 MVT::Other, getControlRoot(), 2804 getValue(I.getAddress()))); 2805 } 2806 2807 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) { 2808 if (DAG.getTarget().Options.TrapUnreachable) 2809 DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot())); 2810 } 2811 2812 void SelectionDAGBuilder::visitFSub(const User &I) { 2813 // -0.0 - X --> fneg 2814 Type *Ty = I.getType(); 2815 if (isa<Constant>(I.getOperand(0)) && 2816 I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { 2817 SDValue Op2 = getValue(I.getOperand(1)); 2818 setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), 2819 Op2.getValueType(), Op2)); 2820 return; 2821 } 2822 2823 visitBinary(I, ISD::FSUB); 2824 } 2825 2826 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { 2827 SDValue Op1 = getValue(I.getOperand(0)); 2828 SDValue Op2 = getValue(I.getOperand(1)); 2829 2830 bool nuw = false; 2831 bool nsw = false; 2832 bool exact = false; 2833 if (const OverflowingBinaryOperator *OFBinOp = 2834 dyn_cast<const OverflowingBinaryOperator>(&I)) { 2835 nuw = OFBinOp->hasNoUnsignedWrap(); 2836 nsw = OFBinOp->hasNoSignedWrap(); 2837 } 2838 if (const PossiblyExactOperator *ExactOp = 2839 dyn_cast<const PossiblyExactOperator>(&I)) 2840 exact = ExactOp->isExact(); 2841 2842 SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), 2843 Op1, Op2, nuw, nsw, exact); 2844 setValue(&I, BinNodeValue); 2845 } 2846 2847 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { 2848 SDValue Op1 = getValue(I.getOperand(0)); 2849 SDValue Op2 = getValue(I.getOperand(1)); 2850 2851 EVT ShiftTy = 2852 DAG.getTargetLoweringInfo().getShiftAmountTy(Op2.getValueType()); 2853 2854 // Coerce the shift amount to the right type if we can. 2855 if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { 2856 unsigned ShiftSize = ShiftTy.getSizeInBits(); 2857 unsigned Op2Size = Op2.getValueType().getSizeInBits(); 2858 SDLoc DL = getCurSDLoc(); 2859 2860 // If the operand is smaller than the shift count type, promote it. 2861 if (ShiftSize > Op2Size) 2862 Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2); 2863 2864 // If the operand is larger than the shift count type but the shift 2865 // count type has enough bits to represent any shift value, truncate 2866 // it now. This is a common case and it exposes the truncate to 2867 // optimization early. 2868 else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits())) 2869 Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2); 2870 // Otherwise we'll need to temporarily settle for some other convenient 2871 // type. Type legalization will make adjustments once the shiftee is split. 2872 else 2873 Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); 2874 } 2875 2876 bool nuw = false; 2877 bool nsw = false; 2878 bool exact = false; 2879 2880 if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { 2881 2882 if (const OverflowingBinaryOperator *OFBinOp = 2883 dyn_cast<const OverflowingBinaryOperator>(&I)) { 2884 nuw = OFBinOp->hasNoUnsignedWrap(); 2885 nsw = OFBinOp->hasNoSignedWrap(); 2886 } 2887 if (const PossiblyExactOperator *ExactOp = 2888 dyn_cast<const PossiblyExactOperator>(&I)) 2889 exact = ExactOp->isExact(); 2890 } 2891 2892 SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, 2893 nuw, nsw, exact); 2894 setValue(&I, Res); 2895 } 2896 2897 void SelectionDAGBuilder::visitSDiv(const User &I) { 2898 SDValue Op1 = getValue(I.getOperand(0)); 2899 SDValue Op2 = getValue(I.getOperand(1)); 2900 2901 // Turn exact SDivs into multiplications. 2902 // FIXME: This should be in DAGCombiner, but it doesn't have access to the 2903 // exact bit. 2904 if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() && 2905 !isa<ConstantSDNode>(Op1) && 2906 isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue()) 2907 setValue(&I, DAG.getTargetLoweringInfo() 2908 .BuildExactSDIV(Op1, Op2, getCurSDLoc(), DAG)); 2909 else 2910 setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), 2911 Op1, Op2)); 2912 } 2913 2914 void SelectionDAGBuilder::visitICmp(const User &I) { 2915 ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE; 2916 if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I)) 2917 predicate = IC->getPredicate(); 2918 else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I)) 2919 predicate = ICmpInst::Predicate(IC->getPredicate()); 2920 SDValue Op1 = getValue(I.getOperand(0)); 2921 SDValue Op2 = getValue(I.getOperand(1)); 2922 ISD::CondCode Opcode = getICmpCondCode(predicate); 2923 2924 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2925 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); 2926 } 2927 2928 void SelectionDAGBuilder::visitFCmp(const User &I) { 2929 FCmpInst::Predicate predicate = FCmpInst::BAD_FCMP_PREDICATE; 2930 if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I)) 2931 predicate = FC->getPredicate(); 2932 else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I)) 2933 predicate = FCmpInst::Predicate(FC->getPredicate()); 2934 SDValue Op1 = getValue(I.getOperand(0)); 2935 SDValue Op2 = getValue(I.getOperand(1)); 2936 ISD::CondCode Condition = getFCmpCondCode(predicate); 2937 if (TM.Options.NoNaNsFPMath) 2938 Condition = getFCmpCodeWithoutNaN(Condition); 2939 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2940 setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); 2941 } 2942 2943 void SelectionDAGBuilder::visitSelect(const User &I) { 2944 SmallVector<EVT, 4> ValueVTs; 2945 ComputeValueVTs(DAG.getTargetLoweringInfo(), I.getType(), ValueVTs); 2946 unsigned NumValues = ValueVTs.size(); 2947 if (NumValues == 0) return; 2948 2949 SmallVector<SDValue, 4> Values(NumValues); 2950 SDValue Cond = getValue(I.getOperand(0)); 2951 SDValue TrueVal = getValue(I.getOperand(1)); 2952 SDValue FalseVal = getValue(I.getOperand(2)); 2953 ISD::NodeType OpCode = Cond.getValueType().isVector() ? 2954 ISD::VSELECT : ISD::SELECT; 2955 2956 for (unsigned i = 0; i != NumValues; ++i) 2957 Values[i] = DAG.getNode(OpCode, getCurSDLoc(), 2958 TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), 2959 Cond, 2960 SDValue(TrueVal.getNode(), 2961 TrueVal.getResNo() + i), 2962 SDValue(FalseVal.getNode(), 2963 FalseVal.getResNo() + i)); 2964 2965 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 2966 DAG.getVTList(ValueVTs), Values)); 2967 } 2968 2969 void SelectionDAGBuilder::visitTrunc(const User &I) { 2970 // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). 2971 SDValue N = getValue(I.getOperand(0)); 2972 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2973 setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); 2974 } 2975 2976 void SelectionDAGBuilder::visitZExt(const User &I) { 2977 // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 2978 // ZExt also can't be a cast to bool for same reason. So, nothing much to do 2979 SDValue N = getValue(I.getOperand(0)); 2980 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2981 setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); 2982 } 2983 2984 void SelectionDAGBuilder::visitSExt(const User &I) { 2985 // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). 2986 // SExt also can't be a cast to bool for same reason. So, nothing much to do 2987 SDValue N = getValue(I.getOperand(0)); 2988 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 2989 setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); 2990 } 2991 2992 void SelectionDAGBuilder::visitFPTrunc(const User &I) { 2993 // FPTrunc is never a no-op cast, no need to check 2994 SDValue N = getValue(I.getOperand(0)); 2995 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2996 EVT DestVT = TLI.getValueType(I.getType()); 2997 setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, 2998 DAG.getTargetConstant(0, TLI.getPointerTy()))); 2999 } 3000 3001 void SelectionDAGBuilder::visitFPExt(const User &I) { 3002 // FPExt is never a no-op cast, no need to check 3003 SDValue N = getValue(I.getOperand(0)); 3004 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 3005 setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); 3006 } 3007 3008 void SelectionDAGBuilder::visitFPToUI(const User &I) { 3009 // FPToUI is never a no-op cast, no need to check 3010 SDValue N = getValue(I.getOperand(0)); 3011 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 3012 setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); 3013 } 3014 3015 void SelectionDAGBuilder::visitFPToSI(const User &I) { 3016 // FPToSI is never a no-op cast, no need to check 3017 SDValue N = getValue(I.getOperand(0)); 3018 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 3019 setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); 3020 } 3021 3022 void SelectionDAGBuilder::visitUIToFP(const User &I) { 3023 // UIToFP is never a no-op cast, no need to check 3024 SDValue N = getValue(I.getOperand(0)); 3025 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 3026 setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); 3027 } 3028 3029 void SelectionDAGBuilder::visitSIToFP(const User &I) { 3030 // SIToFP is never a no-op cast, no need to check 3031 SDValue N = getValue(I.getOperand(0)); 3032 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 3033 setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); 3034 } 3035 3036 void SelectionDAGBuilder::visitPtrToInt(const User &I) { 3037 // What to do depends on the size of the integer and the size of the pointer. 3038 // We can either truncate, zero extend, or no-op, accordingly. 3039 SDValue N = getValue(I.getOperand(0)); 3040 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 3041 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); 3042 } 3043 3044 void SelectionDAGBuilder::visitIntToPtr(const User &I) { 3045 // What to do depends on the size of the integer and the size of the pointer. 3046 // We can either truncate, zero extend, or no-op, accordingly. 3047 SDValue N = getValue(I.getOperand(0)); 3048 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 3049 setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); 3050 } 3051 3052 void SelectionDAGBuilder::visitBitCast(const User &I) { 3053 SDValue N = getValue(I.getOperand(0)); 3054 EVT DestVT = DAG.getTargetLoweringInfo().getValueType(I.getType()); 3055 3056 // BitCast assures us that source and destination are the same size so this is 3057 // either a BITCAST or a no-op. 3058 if (DestVT != N.getValueType()) 3059 setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), 3060 DestVT, N)); // convert types. 3061 // Check if the original LLVM IR Operand was a ConstantInt, because getValue() 3062 // might fold any kind of constant expression to an integer constant and that 3063 // is not what we are looking for. Only regcognize a bitcast of a genuine 3064 // constant integer as an opaque constant. 3065 else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0))) 3066 setValue(&I, DAG.getConstant(C->getValue(), DestVT, /*isTarget=*/false, 3067 /*isOpaque*/true)); 3068 else 3069 setValue(&I, N); // noop cast. 3070 } 3071 3072 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { 3073 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3074 const Value *SV = I.getOperand(0); 3075 SDValue N = getValue(SV); 3076 EVT DestVT = TLI.getValueType(I.getType()); 3077 3078 unsigned SrcAS = SV->getType()->getPointerAddressSpace(); 3079 unsigned DestAS = I.getType()->getPointerAddressSpace(); 3080 3081 if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) 3082 N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); 3083 3084 setValue(&I, N); 3085 } 3086 3087 void SelectionDAGBuilder::visitInsertElement(const User &I) { 3088 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3089 SDValue InVec = getValue(I.getOperand(0)); 3090 SDValue InVal = getValue(I.getOperand(1)); 3091 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), 3092 getCurSDLoc(), TLI.getVectorIdxTy()); 3093 setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), 3094 TLI.getValueType(I.getType()), InVec, InVal, InIdx)); 3095 } 3096 3097 void SelectionDAGBuilder::visitExtractElement(const User &I) { 3098 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3099 SDValue InVec = getValue(I.getOperand(0)); 3100 SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), 3101 getCurSDLoc(), TLI.getVectorIdxTy()); 3102 setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), 3103 TLI.getValueType(I.getType()), InVec, InIdx)); 3104 } 3105 3106 // Utility for visitShuffleVector - Return true if every element in Mask, 3107 // beginning from position Pos and ending in Pos+Size, falls within the 3108 // specified sequential range [L, L+Pos). or is undef. 3109 static bool isSequentialInRange(const SmallVectorImpl<int> &Mask, 3110 unsigned Pos, unsigned Size, int Low) { 3111 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low) 3112 if (Mask[i] >= 0 && Mask[i] != Low) 3113 return false; 3114 return true; 3115 } 3116 3117 void SelectionDAGBuilder::visitShuffleVector(const User &I) { 3118 SDValue Src1 = getValue(I.getOperand(0)); 3119 SDValue Src2 = getValue(I.getOperand(1)); 3120 3121 SmallVector<int, 8> Mask; 3122 ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask); 3123 unsigned MaskNumElts = Mask.size(); 3124 3125 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3126 EVT VT = TLI.getValueType(I.getType()); 3127 EVT SrcVT = Src1.getValueType(); 3128 unsigned SrcNumElts = SrcVT.getVectorNumElements(); 3129 3130 if (SrcNumElts == MaskNumElts) { 3131 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 3132 &Mask[0])); 3133 return; 3134 } 3135 3136 // Normalize the shuffle vector since mask and vector length don't match. 3137 if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) { 3138 // Mask is longer than the source vectors and is a multiple of the source 3139 // vectors. We can use concatenate vector to make the mask and vectors 3140 // lengths match. 3141 if (SrcNumElts*2 == MaskNumElts) { 3142 // First check for Src1 in low and Src2 in high 3143 if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && 3144 isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { 3145 // The shuffle is concatenating two vectors together. 3146 setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), 3147 VT, Src1, Src2)); 3148 return; 3149 } 3150 // Then check for Src2 in low and Src1 in high 3151 if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && 3152 isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { 3153 // The shuffle is concatenating two vectors together. 3154 setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), 3155 VT, Src2, Src1)); 3156 return; 3157 } 3158 } 3159 3160 // Pad both vectors with undefs to make them the same length as the mask. 3161 unsigned NumConcat = MaskNumElts / SrcNumElts; 3162 bool Src1U = Src1.getOpcode() == ISD::UNDEF; 3163 bool Src2U = Src2.getOpcode() == ISD::UNDEF; 3164 SDValue UndefVal = DAG.getUNDEF(SrcVT); 3165 3166 SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal); 3167 SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal); 3168 MOps1[0] = Src1; 3169 MOps2[0] = Src2; 3170 3171 Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 3172 getCurSDLoc(), VT, MOps1); 3173 Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, 3174 getCurSDLoc(), VT, MOps2); 3175 3176 // Readjust mask for new input vector length. 3177 SmallVector<int, 8> MappedOps; 3178 for (unsigned i = 0; i != MaskNumElts; ++i) { 3179 int Idx = Mask[i]; 3180 if (Idx >= (int)SrcNumElts) 3181 Idx -= SrcNumElts - MaskNumElts; 3182 MappedOps.push_back(Idx); 3183 } 3184 3185 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 3186 &MappedOps[0])); 3187 return; 3188 } 3189 3190 if (SrcNumElts > MaskNumElts) { 3191 // Analyze the access pattern of the vector to see if we can extract 3192 // two subvectors and do the shuffle. The analysis is done by calculating 3193 // the range of elements the mask access on both vectors. 3194 int MinRange[2] = { static_cast<int>(SrcNumElts), 3195 static_cast<int>(SrcNumElts)}; 3196 int MaxRange[2] = {-1, -1}; 3197 3198 for (unsigned i = 0; i != MaskNumElts; ++i) { 3199 int Idx = Mask[i]; 3200 unsigned Input = 0; 3201 if (Idx < 0) 3202 continue; 3203 3204 if (Idx >= (int)SrcNumElts) { 3205 Input = 1; 3206 Idx -= SrcNumElts; 3207 } 3208 if (Idx > MaxRange[Input]) 3209 MaxRange[Input] = Idx; 3210 if (Idx < MinRange[Input]) 3211 MinRange[Input] = Idx; 3212 } 3213 3214 // Check if the access is smaller than the vector size and can we find 3215 // a reasonable extract index. 3216 int RangeUse[2] = { -1, -1 }; // 0 = Unused, 1 = Extract, -1 = Can not 3217 // Extract. 3218 int StartIdx[2]; // StartIdx to extract from 3219 for (unsigned Input = 0; Input < 2; ++Input) { 3220 if (MinRange[Input] >= (int)SrcNumElts && MaxRange[Input] < 0) { 3221 RangeUse[Input] = 0; // Unused 3222 StartIdx[Input] = 0; 3223 continue; 3224 } 3225 3226 // Find a good start index that is a multiple of the mask length. Then 3227 // see if the rest of the elements are in range. 3228 StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts; 3229 if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts && 3230 StartIdx[Input] + MaskNumElts <= SrcNumElts) 3231 RangeUse[Input] = 1; // Extract from a multiple of the mask length. 3232 } 3233 3234 if (RangeUse[0] == 0 && RangeUse[1] == 0) { 3235 setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used. 3236 return; 3237 } 3238 if (RangeUse[0] >= 0 && RangeUse[1] >= 0) { 3239 // Extract appropriate subvector and generate a vector shuffle 3240 for (unsigned Input = 0; Input < 2; ++Input) { 3241 SDValue &Src = Input == 0 ? Src1 : Src2; 3242 if (RangeUse[Input] == 0) 3243 Src = DAG.getUNDEF(VT); 3244 else 3245 Src = DAG.getNode( 3246 ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, Src, 3247 DAG.getConstant(StartIdx[Input], TLI.getVectorIdxTy())); 3248 } 3249 3250 // Calculate new mask. 3251 SmallVector<int, 8> MappedOps; 3252 for (unsigned i = 0; i != MaskNumElts; ++i) { 3253 int Idx = Mask[i]; 3254 if (Idx >= 0) { 3255 if (Idx < (int)SrcNumElts) 3256 Idx -= StartIdx[0]; 3257 else 3258 Idx -= SrcNumElts + StartIdx[1] - MaskNumElts; 3259 } 3260 MappedOps.push_back(Idx); 3261 } 3262 3263 setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, 3264 &MappedOps[0])); 3265 return; 3266 } 3267 } 3268 3269 // We can't use either concat vectors or extract subvectors so fall back to 3270 // replacing the shuffle with extract and build vector. 3271 // to insert and build vector. 3272 EVT EltVT = VT.getVectorElementType(); 3273 EVT IdxVT = TLI.getVectorIdxTy(); 3274 SmallVector<SDValue,8> Ops; 3275 for (unsigned i = 0; i != MaskNumElts; ++i) { 3276 int Idx = Mask[i]; 3277 SDValue Res; 3278 3279 if (Idx < 0) { 3280 Res = DAG.getUNDEF(EltVT); 3281 } else { 3282 SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; 3283 if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; 3284 3285 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), 3286 EltVT, Src, DAG.getConstant(Idx, IdxVT)); 3287 } 3288 3289 Ops.push_back(Res); 3290 } 3291 3292 setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, Ops)); 3293 } 3294 3295 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { 3296 const Value *Op0 = I.getOperand(0); 3297 const Value *Op1 = I.getOperand(1); 3298 Type *AggTy = I.getType(); 3299 Type *ValTy = Op1->getType(); 3300 bool IntoUndef = isa<UndefValue>(Op0); 3301 bool FromUndef = isa<UndefValue>(Op1); 3302 3303 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 3304 3305 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3306 SmallVector<EVT, 4> AggValueVTs; 3307 ComputeValueVTs(TLI, AggTy, AggValueVTs); 3308 SmallVector<EVT, 4> ValValueVTs; 3309 ComputeValueVTs(TLI, ValTy, ValValueVTs); 3310 3311 unsigned NumAggValues = AggValueVTs.size(); 3312 unsigned NumValValues = ValValueVTs.size(); 3313 SmallVector<SDValue, 4> Values(NumAggValues); 3314 3315 // Ignore an insertvalue that produces an empty object 3316 if (!NumAggValues) { 3317 setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); 3318 return; 3319 } 3320 3321 SDValue Agg = getValue(Op0); 3322 unsigned i = 0; 3323 // Copy the beginning value(s) from the original aggregate. 3324 for (; i != LinearIndex; ++i) 3325 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : 3326 SDValue(Agg.getNode(), Agg.getResNo() + i); 3327 // Copy values from the inserted value(s). 3328 if (NumValValues) { 3329 SDValue Val = getValue(Op1); 3330 for (; i != LinearIndex + NumValValues; ++i) 3331 Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) : 3332 SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex); 3333 } 3334 // Copy remaining value(s) from the original aggregate. 3335 for (; i != NumAggValues; ++i) 3336 Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : 3337 SDValue(Agg.getNode(), Agg.getResNo() + i); 3338 3339 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 3340 DAG.getVTList(AggValueVTs), Values)); 3341 } 3342 3343 void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { 3344 const Value *Op0 = I.getOperand(0); 3345 Type *AggTy = Op0->getType(); 3346 Type *ValTy = I.getType(); 3347 bool OutOfUndef = isa<UndefValue>(Op0); 3348 3349 unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); 3350 3351 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3352 SmallVector<EVT, 4> ValValueVTs; 3353 ComputeValueVTs(TLI, ValTy, ValValueVTs); 3354 3355 unsigned NumValValues = ValValueVTs.size(); 3356 3357 // Ignore a extractvalue that produces an empty object 3358 if (!NumValValues) { 3359 setValue(&I, DAG.getUNDEF(MVT(MVT::Other))); 3360 return; 3361 } 3362 3363 SmallVector<SDValue, 4> Values(NumValValues); 3364 3365 SDValue Agg = getValue(Op0); 3366 // Copy out the selected value(s). 3367 for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i) 3368 Values[i - LinearIndex] = 3369 OutOfUndef ? 3370 DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : 3371 SDValue(Agg.getNode(), Agg.getResNo() + i); 3372 3373 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 3374 DAG.getVTList(ValValueVTs), Values)); 3375 } 3376 3377 void SelectionDAGBuilder::visitGetElementPtr(const User &I) { 3378 Value *Op0 = I.getOperand(0); 3379 // Note that the pointer operand may be a vector of pointers. Take the scalar 3380 // element which holds a pointer. 3381 Type *Ty = Op0->getType()->getScalarType(); 3382 unsigned AS = Ty->getPointerAddressSpace(); 3383 SDValue N = getValue(Op0); 3384 3385 for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); 3386 OI != E; ++OI) { 3387 const Value *Idx = *OI; 3388 if (StructType *StTy = dyn_cast<StructType>(Ty)) { 3389 unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue(); 3390 if (Field) { 3391 // N = N + Offset 3392 uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field); 3393 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, 3394 DAG.getConstant(Offset, N.getValueType())); 3395 } 3396 3397 Ty = StTy->getElementType(Field); 3398 } else { 3399 Ty = cast<SequentialType>(Ty)->getElementType(); 3400 MVT PtrTy = DAG.getTargetLoweringInfo().getPointerTy(AS); 3401 unsigned PtrSize = PtrTy.getSizeInBits(); 3402 APInt ElementSize(PtrSize, DL->getTypeAllocSize(Ty)); 3403 3404 // If this is a constant subscript, handle it quickly. 3405 if (const auto *CI = dyn_cast<ConstantInt>(Idx)) { 3406 if (CI->isZero()) 3407 continue; 3408 APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize); 3409 SDValue OffsVal = DAG.getConstant(Offs, PtrTy); 3410 N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); 3411 continue; 3412 } 3413 3414 // N = N + Idx * ElementSize; 3415 SDValue IdxN = getValue(Idx); 3416 3417 // If the index is smaller or larger than intptr_t, truncate or extend 3418 // it. 3419 IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); 3420 3421 // If this is a multiply by a power of two, turn it into a shl 3422 // immediately. This is a very common case. 3423 if (ElementSize != 1) { 3424 if (ElementSize.isPowerOf2()) { 3425 unsigned Amt = ElementSize.logBase2(); 3426 IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), 3427 N.getValueType(), IdxN, 3428 DAG.getConstant(Amt, IdxN.getValueType())); 3429 } else { 3430 SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); 3431 IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), 3432 N.getValueType(), IdxN, Scale); 3433 } 3434 } 3435 3436 N = DAG.getNode(ISD::ADD, getCurSDLoc(), 3437 N.getValueType(), N, IdxN); 3438 } 3439 } 3440 3441 setValue(&I, N); 3442 } 3443 3444 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { 3445 // If this is a fixed sized alloca in the entry block of the function, 3446 // allocate it statically on the stack. 3447 if (FuncInfo.StaticAllocaMap.count(&I)) 3448 return; // getValue will auto-populate this. 3449 3450 Type *Ty = I.getAllocatedType(); 3451 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3452 uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); 3453 unsigned Align = 3454 std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), 3455 I.getAlignment()); 3456 3457 SDValue AllocSize = getValue(I.getArraySize()); 3458 3459 EVT IntPtr = TLI.getPointerTy(); 3460 if (AllocSize.getValueType() != IntPtr) 3461 AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); 3462 3463 AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, 3464 AllocSize, 3465 DAG.getConstant(TySize, IntPtr)); 3466 3467 // Handle alignment. If the requested alignment is less than or equal to 3468 // the stack alignment, ignore it. If the size is greater than or equal to 3469 // the stack alignment, we note this in the DYNAMIC_STACKALLOC node. 3470 unsigned StackAlign = 3471 DAG.getSubtarget().getFrameLowering()->getStackAlignment(); 3472 if (Align <= StackAlign) 3473 Align = 0; 3474 3475 // Round the size of the allocation up to the stack alignment size 3476 // by add SA-1 to the size. 3477 AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), 3478 AllocSize.getValueType(), AllocSize, 3479 DAG.getIntPtrConstant(StackAlign-1)); 3480 3481 // Mask out the low bits for alignment purposes. 3482 AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), 3483 AllocSize.getValueType(), AllocSize, 3484 DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); 3485 3486 SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; 3487 SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); 3488 SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops); 3489 setValue(&I, DSA); 3490 DAG.setRoot(DSA.getValue(1)); 3491 3492 assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects()); 3493 } 3494 3495 void SelectionDAGBuilder::visitLoad(const LoadInst &I) { 3496 if (I.isAtomic()) 3497 return visitAtomicLoad(I); 3498 3499 const Value *SV = I.getOperand(0); 3500 SDValue Ptr = getValue(SV); 3501 3502 Type *Ty = I.getType(); 3503 3504 bool isVolatile = I.isVolatile(); 3505 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; 3506 bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr; 3507 unsigned Alignment = I.getAlignment(); 3508 3509 AAMDNodes AAInfo; 3510 I.getAAMetadata(AAInfo); 3511 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); 3512 3513 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3514 SmallVector<EVT, 4> ValueVTs; 3515 SmallVector<uint64_t, 4> Offsets; 3516 ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); 3517 unsigned NumValues = ValueVTs.size(); 3518 if (NumValues == 0) 3519 return; 3520 3521 SDValue Root; 3522 bool ConstantMemory = false; 3523 if (isVolatile || NumValues > MaxParallelChains) 3524 // Serialize volatile loads with other side effects. 3525 Root = getRoot(); 3526 else if (AA->pointsToConstantMemory( 3527 AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), AAInfo))) { 3528 // Do not serialize (non-volatile) loads of constant memory with anything. 3529 Root = DAG.getEntryNode(); 3530 ConstantMemory = true; 3531 } else { 3532 // Do not serialize non-volatile loads against each other. 3533 Root = DAG.getRoot(); 3534 } 3535 3536 if (isVolatile) 3537 Root = TLI.prepareVolatileOrAtomicLoad(Root, getCurSDLoc(), DAG); 3538 3539 SmallVector<SDValue, 4> Values(NumValues); 3540 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 3541 NumValues)); 3542 EVT PtrVT = Ptr.getValueType(); 3543 unsigned ChainI = 0; 3544 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 3545 // Serializing loads here may result in excessive register pressure, and 3546 // TokenFactor places arbitrary choke points on the scheduler. SD scheduling 3547 // could recover a bit by hoisting nodes upward in the chain by recognizing 3548 // they are side-effect free or do not alias. The optimizer should really 3549 // avoid this case by converting large object/array copies to llvm.memcpy 3550 // (MaxParallelChains should always remain as failsafe). 3551 if (ChainI == MaxParallelChains) { 3552 assert(PendingLoads.empty() && "PendingLoads must be serialized first"); 3553 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 3554 makeArrayRef(Chains.data(), ChainI)); 3555 Root = Chain; 3556 ChainI = 0; 3557 } 3558 SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), 3559 PtrVT, Ptr, 3560 DAG.getConstant(Offsets[i], PtrVT)); 3561 SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, 3562 A, MachinePointerInfo(SV, Offsets[i]), isVolatile, 3563 isNonTemporal, isInvariant, Alignment, AAInfo, 3564 Ranges); 3565 3566 Values[i] = L; 3567 Chains[ChainI] = L.getValue(1); 3568 } 3569 3570 if (!ConstantMemory) { 3571 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 3572 makeArrayRef(Chains.data(), ChainI)); 3573 if (isVolatile) 3574 DAG.setRoot(Chain); 3575 else 3576 PendingLoads.push_back(Chain); 3577 } 3578 3579 setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), 3580 DAG.getVTList(ValueVTs), Values)); 3581 } 3582 3583 void SelectionDAGBuilder::visitStore(const StoreInst &I) { 3584 if (I.isAtomic()) 3585 return visitAtomicStore(I); 3586 3587 const Value *SrcV = I.getOperand(0); 3588 const Value *PtrV = I.getOperand(1); 3589 3590 SmallVector<EVT, 4> ValueVTs; 3591 SmallVector<uint64_t, 4> Offsets; 3592 ComputeValueVTs(DAG.getTargetLoweringInfo(), SrcV->getType(), 3593 ValueVTs, &Offsets); 3594 unsigned NumValues = ValueVTs.size(); 3595 if (NumValues == 0) 3596 return; 3597 3598 // Get the lowered operands. Note that we do this after 3599 // checking if NumResults is zero, because with zero results 3600 // the operands won't have values in the map. 3601 SDValue Src = getValue(SrcV); 3602 SDValue Ptr = getValue(PtrV); 3603 3604 SDValue Root = getRoot(); 3605 SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains), 3606 NumValues)); 3607 EVT PtrVT = Ptr.getValueType(); 3608 bool isVolatile = I.isVolatile(); 3609 bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr; 3610 unsigned Alignment = I.getAlignment(); 3611 3612 AAMDNodes AAInfo; 3613 I.getAAMetadata(AAInfo); 3614 3615 unsigned ChainI = 0; 3616 for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { 3617 // See visitLoad comments. 3618 if (ChainI == MaxParallelChains) { 3619 SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 3620 makeArrayRef(Chains.data(), ChainI)); 3621 Root = Chain; 3622 ChainI = 0; 3623 } 3624 SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, 3625 DAG.getConstant(Offsets[i], PtrVT)); 3626 SDValue St = DAG.getStore(Root, getCurSDLoc(), 3627 SDValue(Src.getNode(), Src.getResNo() + i), 3628 Add, MachinePointerInfo(PtrV, Offsets[i]), 3629 isVolatile, isNonTemporal, Alignment, AAInfo); 3630 Chains[ChainI] = St; 3631 } 3632 3633 SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, 3634 makeArrayRef(Chains.data(), ChainI)); 3635 DAG.setRoot(StoreNode); 3636 } 3637 3638 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) { 3639 SDLoc sdl = getCurSDLoc(); 3640 3641 // llvm.masked.store.*(Src0, Ptr, alignemt, Mask) 3642 Value *PtrOperand = I.getArgOperand(1); 3643 SDValue Ptr = getValue(PtrOperand); 3644 SDValue Src0 = getValue(I.getArgOperand(0)); 3645 SDValue Mask = getValue(I.getArgOperand(3)); 3646 EVT VT = Src0.getValueType(); 3647 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue(); 3648 if (!Alignment) 3649 Alignment = DAG.getEVTAlignment(VT); 3650 3651 AAMDNodes AAInfo; 3652 I.getAAMetadata(AAInfo); 3653 3654 MachineMemOperand *MMO = 3655 DAG.getMachineFunction(). 3656 getMachineMemOperand(MachinePointerInfo(PtrOperand), 3657 MachineMemOperand::MOStore, VT.getStoreSize(), 3658 Alignment, AAInfo); 3659 SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT, 3660 MMO, false); 3661 DAG.setRoot(StoreNode); 3662 setValue(&I, StoreNode); 3663 } 3664 3665 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) { 3666 SDLoc sdl = getCurSDLoc(); 3667 3668 // @llvm.masked.load.*(Ptr, alignment, Mask, Src0) 3669 Value *PtrOperand = I.getArgOperand(0); 3670 SDValue Ptr = getValue(PtrOperand); 3671 SDValue Src0 = getValue(I.getArgOperand(3)); 3672 SDValue Mask = getValue(I.getArgOperand(2)); 3673 3674 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3675 EVT VT = TLI.getValueType(I.getType()); 3676 unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue(); 3677 if (!Alignment) 3678 Alignment = DAG.getEVTAlignment(VT); 3679 3680 AAMDNodes AAInfo; 3681 I.getAAMetadata(AAInfo); 3682 const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range); 3683 3684 SDValue InChain = DAG.getRoot(); 3685 if (AA->pointsToConstantMemory( 3686 AliasAnalysis::Location(PtrOperand, 3687 AA->getTypeStoreSize(I.getType()), 3688 AAInfo))) { 3689 // Do not serialize (non-volatile) loads of constant memory with anything. 3690 InChain = DAG.getEntryNode(); 3691 } 3692 3693 MachineMemOperand *MMO = 3694 DAG.getMachineFunction(). 3695 getMachineMemOperand(MachinePointerInfo(PtrOperand), 3696 MachineMemOperand::MOLoad, VT.getStoreSize(), 3697 Alignment, AAInfo, Ranges); 3698 3699 SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO, 3700 ISD::NON_EXTLOAD); 3701 SDValue OutChain = Load.getValue(1); 3702 DAG.setRoot(OutChain); 3703 setValue(&I, Load); 3704 } 3705 3706 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { 3707 SDLoc dl = getCurSDLoc(); 3708 AtomicOrdering SuccessOrder = I.getSuccessOrdering(); 3709 AtomicOrdering FailureOrder = I.getFailureOrdering(); 3710 SynchronizationScope Scope = I.getSynchScope(); 3711 3712 SDValue InChain = getRoot(); 3713 3714 MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); 3715 SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); 3716 SDValue L = DAG.getAtomicCmpSwap( 3717 ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, 3718 getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), 3719 getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 3720 /*Alignment=*/ 0, SuccessOrder, FailureOrder, Scope); 3721 3722 SDValue OutChain = L.getValue(2); 3723 3724 setValue(&I, L); 3725 DAG.setRoot(OutChain); 3726 } 3727 3728 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { 3729 SDLoc dl = getCurSDLoc(); 3730 ISD::NodeType NT; 3731 switch (I.getOperation()) { 3732 default: llvm_unreachable("Unknown atomicrmw operation"); 3733 case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break; 3734 case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break; 3735 case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break; 3736 case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break; 3737 case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break; 3738 case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break; 3739 case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break; 3740 case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break; 3741 case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break; 3742 case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break; 3743 case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break; 3744 } 3745 AtomicOrdering Order = I.getOrdering(); 3746 SynchronizationScope Scope = I.getSynchScope(); 3747 3748 SDValue InChain = getRoot(); 3749 3750 SDValue L = 3751 DAG.getAtomic(NT, dl, 3752 getValue(I.getValOperand()).getSimpleValueType(), 3753 InChain, 3754 getValue(I.getPointerOperand()), 3755 getValue(I.getValOperand()), 3756 I.getPointerOperand(), 3757 /* Alignment=*/ 0, Order, Scope); 3758 3759 SDValue OutChain = L.getValue(1); 3760 3761 setValue(&I, L); 3762 DAG.setRoot(OutChain); 3763 } 3764 3765 void SelectionDAGBuilder::visitFence(const FenceInst &I) { 3766 SDLoc dl = getCurSDLoc(); 3767 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3768 SDValue Ops[3]; 3769 Ops[0] = getRoot(); 3770 Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); 3771 Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); 3772 DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops)); 3773 } 3774 3775 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { 3776 SDLoc dl = getCurSDLoc(); 3777 AtomicOrdering Order = I.getOrdering(); 3778 SynchronizationScope Scope = I.getSynchScope(); 3779 3780 SDValue InChain = getRoot(); 3781 3782 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3783 EVT VT = TLI.getValueType(I.getType()); 3784 3785 if (I.getAlignment() < VT.getSizeInBits() / 8) 3786 report_fatal_error("Cannot generate unaligned atomic load"); 3787 3788 MachineMemOperand *MMO = 3789 DAG.getMachineFunction(). 3790 getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), 3791 MachineMemOperand::MOVolatile | 3792 MachineMemOperand::MOLoad, 3793 VT.getStoreSize(), 3794 I.getAlignment() ? I.getAlignment() : 3795 DAG.getEVTAlignment(VT)); 3796 3797 InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG); 3798 SDValue L = 3799 DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, 3800 getValue(I.getPointerOperand()), MMO, 3801 Order, Scope); 3802 3803 SDValue OutChain = L.getValue(1); 3804 3805 setValue(&I, L); 3806 DAG.setRoot(OutChain); 3807 } 3808 3809 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { 3810 SDLoc dl = getCurSDLoc(); 3811 3812 AtomicOrdering Order = I.getOrdering(); 3813 SynchronizationScope Scope = I.getSynchScope(); 3814 3815 SDValue InChain = getRoot(); 3816 3817 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3818 EVT VT = TLI.getValueType(I.getValueOperand()->getType()); 3819 3820 if (I.getAlignment() < VT.getSizeInBits() / 8) 3821 report_fatal_error("Cannot generate unaligned atomic store"); 3822 3823 SDValue OutChain = 3824 DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, 3825 InChain, 3826 getValue(I.getPointerOperand()), 3827 getValue(I.getValueOperand()), 3828 I.getPointerOperand(), I.getAlignment(), 3829 Order, Scope); 3830 3831 DAG.setRoot(OutChain); 3832 } 3833 3834 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC 3835 /// node. 3836 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, 3837 unsigned Intrinsic) { 3838 bool HasChain = !I.doesNotAccessMemory(); 3839 bool OnlyLoad = HasChain && I.onlyReadsMemory(); 3840 3841 // Build the operand list. 3842 SmallVector<SDValue, 8> Ops; 3843 if (HasChain) { // If this intrinsic has side-effects, chainify it. 3844 if (OnlyLoad) { 3845 // We don't need to serialize loads against other loads. 3846 Ops.push_back(DAG.getRoot()); 3847 } else { 3848 Ops.push_back(getRoot()); 3849 } 3850 } 3851 3852 // Info is set by getTgtMemInstrinsic 3853 TargetLowering::IntrinsicInfo Info; 3854 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3855 bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); 3856 3857 // Add the intrinsic ID as an integer operand if it's not a target intrinsic. 3858 if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || 3859 Info.opc == ISD::INTRINSIC_W_CHAIN) 3860 Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); 3861 3862 // Add all operands of the call to the operand list. 3863 for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { 3864 SDValue Op = getValue(I.getArgOperand(i)); 3865 Ops.push_back(Op); 3866 } 3867 3868 SmallVector<EVT, 4> ValueVTs; 3869 ComputeValueVTs(TLI, I.getType(), ValueVTs); 3870 3871 if (HasChain) 3872 ValueVTs.push_back(MVT::Other); 3873 3874 SDVTList VTs = DAG.getVTList(ValueVTs); 3875 3876 // Create the node. 3877 SDValue Result; 3878 if (IsTgtIntrinsic) { 3879 // This is target intrinsic that touches memory 3880 Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), 3881 VTs, Ops, Info.memVT, 3882 MachinePointerInfo(Info.ptrVal, Info.offset), 3883 Info.align, Info.vol, 3884 Info.readMem, Info.writeMem, Info.size); 3885 } else if (!HasChain) { 3886 Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops); 3887 } else if (!I.getType()->isVoidTy()) { 3888 Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops); 3889 } else { 3890 Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops); 3891 } 3892 3893 if (HasChain) { 3894 SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1); 3895 if (OnlyLoad) 3896 PendingLoads.push_back(Chain); 3897 else 3898 DAG.setRoot(Chain); 3899 } 3900 3901 if (!I.getType()->isVoidTy()) { 3902 if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) { 3903 EVT VT = TLI.getValueType(PTy); 3904 Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); 3905 } 3906 3907 setValue(&I, Result); 3908 } 3909 } 3910 3911 /// GetSignificand - Get the significand and build it into a floating-point 3912 /// number with exponent of 1: 3913 /// 3914 /// Op = (Op & 0x007fffff) | 0x3f800000; 3915 /// 3916 /// where Op is the hexadecimal representation of floating point value. 3917 static SDValue 3918 GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { 3919 SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 3920 DAG.getConstant(0x007fffff, MVT::i32)); 3921 SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, 3922 DAG.getConstant(0x3f800000, MVT::i32)); 3923 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2); 3924 } 3925 3926 /// GetExponent - Get the exponent: 3927 /// 3928 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); 3929 /// 3930 /// where Op is the hexadecimal representation of floating point value. 3931 static SDValue 3932 GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, 3933 SDLoc dl) { 3934 SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, 3935 DAG.getConstant(0x7f800000, MVT::i32)); 3936 SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, 3937 DAG.getConstant(23, TLI.getPointerTy())); 3938 SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1, 3939 DAG.getConstant(127, MVT::i32)); 3940 return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2); 3941 } 3942 3943 /// getF32Constant - Get 32-bit floating point constant. 3944 static SDValue 3945 getF32Constant(SelectionDAG &DAG, unsigned Flt) { 3946 return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), 3947 MVT::f32); 3948 } 3949 3950 static SDValue getLimitedPrecisionExp2(SDValue t0, SDLoc dl, 3951 SelectionDAG &DAG) { 3952 // IntegerPartOfX = ((int32_t)(t0); 3953 SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); 3954 3955 // FractionalPartOfX = t0 - (float)IntegerPartOfX; 3956 SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX); 3957 SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1); 3958 3959 // IntegerPartOfX <<= 23; 3960 IntegerPartOfX = DAG.getNode( 3961 ISD::SHL, dl, MVT::i32, IntegerPartOfX, 3962 DAG.getConstant(23, DAG.getTargetLoweringInfo().getPointerTy())); 3963 3964 SDValue TwoToFractionalPartOfX; 3965 if (LimitFloatPrecision <= 6) { 3966 // For floating-point precision of 6: 3967 // 3968 // TwoToFractionalPartOfX = 3969 // 0.997535578f + 3970 // (0.735607626f + 0.252464424f * x) * x; 3971 // 3972 // error 0.0144103317, which is 6 bits 3973 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3974 getF32Constant(DAG, 0x3e814304)); 3975 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 3976 getF32Constant(DAG, 0x3f3c50c8)); 3977 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3978 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3979 getF32Constant(DAG, 0x3f7f5e7e)); 3980 } else if (LimitFloatPrecision <= 12) { 3981 // For floating-point precision of 12: 3982 // 3983 // TwoToFractionalPartOfX = 3984 // 0.999892986f + 3985 // (0.696457318f + 3986 // (0.224338339f + 0.792043434e-1f * x) * x) * x; 3987 // 3988 // error 0.000107046256, which is 13 to 14 bits 3989 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 3990 getF32Constant(DAG, 0x3da235e3)); 3991 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 3992 getF32Constant(DAG, 0x3e65b8f3)); 3993 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 3994 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 3995 getF32Constant(DAG, 0x3f324b07)); 3996 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 3997 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 3998 getF32Constant(DAG, 0x3f7ff8fd)); 3999 } else { // LimitFloatPrecision <= 18 4000 // For floating-point precision of 18: 4001 // 4002 // TwoToFractionalPartOfX = 4003 // 0.999999982f + 4004 // (0.693148872f + 4005 // (0.240227044f + 4006 // (0.554906021e-1f + 4007 // (0.961591928e-2f + 4008 // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x; 4009 // error 2.47208000*10^(-7), which is better than 18 bits 4010 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4011 getF32Constant(DAG, 0x3924b03e)); 4012 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4013 getF32Constant(DAG, 0x3ab24b87)); 4014 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4015 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4016 getF32Constant(DAG, 0x3c1d8c17)); 4017 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4018 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 4019 getF32Constant(DAG, 0x3d634a1d)); 4020 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4021 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 4022 getF32Constant(DAG, 0x3e75fe14)); 4023 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 4024 SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, 4025 getF32Constant(DAG, 0x3f317234)); 4026 SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); 4027 TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, 4028 getF32Constant(DAG, 0x3f800000)); 4029 } 4030 4031 // Add the exponent into the result in integer domain. 4032 SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX); 4033 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 4034 DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX)); 4035 } 4036 4037 /// expandExp - Lower an exp intrinsic. Handles the special sequences for 4038 /// limited-precision mode. 4039 static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, 4040 const TargetLowering &TLI) { 4041 if (Op.getValueType() == MVT::f32 && 4042 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4043 4044 // Put the exponent in the right bit position for later addition to the 4045 // final result: 4046 // 4047 // #define LOG2OFe 1.4426950f 4048 // t0 = Op * LOG2OFe 4049 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, 4050 getF32Constant(DAG, 0x3fb8aa3b)); 4051 return getLimitedPrecisionExp2(t0, dl, DAG); 4052 } 4053 4054 // No special expansion. 4055 return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); 4056 } 4057 4058 /// expandLog - Lower a log intrinsic. Handles the special sequences for 4059 /// limited-precision mode. 4060 static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, 4061 const TargetLowering &TLI) { 4062 if (Op.getValueType() == MVT::f32 && 4063 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4064 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 4065 4066 // Scale the exponent by log(2) [0.69314718f]. 4067 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 4068 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 4069 getF32Constant(DAG, 0x3f317218)); 4070 4071 // Get the significand and build it into a floating-point number with 4072 // exponent of 1. 4073 SDValue X = GetSignificand(DAG, Op1, dl); 4074 4075 SDValue LogOfMantissa; 4076 if (LimitFloatPrecision <= 6) { 4077 // For floating-point precision of 6: 4078 // 4079 // LogofMantissa = 4080 // -1.1609546f + 4081 // (1.4034025f - 0.23903021f * x) * x; 4082 // 4083 // error 0.0034276066, which is better than 8 bits 4084 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4085 getF32Constant(DAG, 0xbe74c456)); 4086 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4087 getF32Constant(DAG, 0x3fb3a2b1)); 4088 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4089 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4090 getF32Constant(DAG, 0x3f949a29)); 4091 } else if (LimitFloatPrecision <= 12) { 4092 // For floating-point precision of 12: 4093 // 4094 // LogOfMantissa = 4095 // -1.7417939f + 4096 // (2.8212026f + 4097 // (-1.4699568f + 4098 // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x; 4099 // 4100 // error 0.000061011436, which is 14 bits 4101 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4102 getF32Constant(DAG, 0xbd67b6d6)); 4103 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4104 getF32Constant(DAG, 0x3ee4f4b8)); 4105 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4106 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4107 getF32Constant(DAG, 0x3fbc278b)); 4108 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4109 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4110 getF32Constant(DAG, 0x40348e95)); 4111 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4112 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 4113 getF32Constant(DAG, 0x3fdef31a)); 4114 } else { // LimitFloatPrecision <= 18 4115 // For floating-point precision of 18: 4116 // 4117 // LogOfMantissa = 4118 // -2.1072184f + 4119 // (4.2372794f + 4120 // (-3.7029485f + 4121 // (2.2781945f + 4122 // (-0.87823314f + 4123 // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x; 4124 // 4125 // error 0.0000023660568, which is better than 18 bits 4126 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4127 getF32Constant(DAG, 0xbc91e5ac)); 4128 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4129 getF32Constant(DAG, 0x3e4350aa)); 4130 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4131 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4132 getF32Constant(DAG, 0x3f60d3e3)); 4133 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4134 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4135 getF32Constant(DAG, 0x4011cdf0)); 4136 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4137 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 4138 getF32Constant(DAG, 0x406cfd1c)); 4139 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4140 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 4141 getF32Constant(DAG, 0x408797cb)); 4142 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 4143 LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 4144 getF32Constant(DAG, 0x4006dcab)); 4145 } 4146 4147 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); 4148 } 4149 4150 // No special expansion. 4151 return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); 4152 } 4153 4154 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for 4155 /// limited-precision mode. 4156 static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, 4157 const TargetLowering &TLI) { 4158 if (Op.getValueType() == MVT::f32 && 4159 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4160 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 4161 4162 // Get the exponent. 4163 SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl); 4164 4165 // Get the significand and build it into a floating-point number with 4166 // exponent of 1. 4167 SDValue X = GetSignificand(DAG, Op1, dl); 4168 4169 // Different possible minimax approximations of significand in 4170 // floating-point for various degrees of accuracy over [1,2]. 4171 SDValue Log2ofMantissa; 4172 if (LimitFloatPrecision <= 6) { 4173 // For floating-point precision of 6: 4174 // 4175 // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x; 4176 // 4177 // error 0.0049451742, which is more than 7 bits 4178 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4179 getF32Constant(DAG, 0xbeb08fe0)); 4180 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4181 getF32Constant(DAG, 0x40019463)); 4182 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4183 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4184 getF32Constant(DAG, 0x3fd6633d)); 4185 } else if (LimitFloatPrecision <= 12) { 4186 // For floating-point precision of 12: 4187 // 4188 // Log2ofMantissa = 4189 // -2.51285454f + 4190 // (4.07009056f + 4191 // (-2.12067489f + 4192 // (.645142248f - 0.816157886e-1f * x) * x) * x) * x; 4193 // 4194 // error 0.0000876136000, which is better than 13 bits 4195 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4196 getF32Constant(DAG, 0xbda7262e)); 4197 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4198 getF32Constant(DAG, 0x3f25280b)); 4199 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4200 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4201 getF32Constant(DAG, 0x4007b923)); 4202 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4203 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4204 getF32Constant(DAG, 0x40823e2f)); 4205 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4206 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 4207 getF32Constant(DAG, 0x4020d29c)); 4208 } else { // LimitFloatPrecision <= 18 4209 // For floating-point precision of 18: 4210 // 4211 // Log2ofMantissa = 4212 // -3.0400495f + 4213 // (6.1129976f + 4214 // (-5.3420409f + 4215 // (3.2865683f + 4216 // (-1.2669343f + 4217 // (0.27515199f - 4218 // 0.25691327e-1f * x) * x) * x) * x) * x) * x; 4219 // 4220 // error 0.0000018516, which is better than 18 bits 4221 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4222 getF32Constant(DAG, 0xbcd2769e)); 4223 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4224 getF32Constant(DAG, 0x3e8ce0b9)); 4225 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4226 SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4227 getF32Constant(DAG, 0x3fa22ae7)); 4228 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4229 SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, 4230 getF32Constant(DAG, 0x40525723)); 4231 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4232 SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, 4233 getF32Constant(DAG, 0x40aaf200)); 4234 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4235 SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, 4236 getF32Constant(DAG, 0x40c39dad)); 4237 SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); 4238 Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, 4239 getF32Constant(DAG, 0x4042902c)); 4240 } 4241 4242 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); 4243 } 4244 4245 // No special expansion. 4246 return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); 4247 } 4248 4249 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for 4250 /// limited-precision mode. 4251 static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, 4252 const TargetLowering &TLI) { 4253 if (Op.getValueType() == MVT::f32 && 4254 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4255 SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 4256 4257 // Scale the exponent by log10(2) [0.30102999f]. 4258 SDValue Exp = GetExponent(DAG, Op1, TLI, dl); 4259 SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp, 4260 getF32Constant(DAG, 0x3e9a209a)); 4261 4262 // Get the significand and build it into a floating-point number with 4263 // exponent of 1. 4264 SDValue X = GetSignificand(DAG, Op1, dl); 4265 4266 SDValue Log10ofMantissa; 4267 if (LimitFloatPrecision <= 6) { 4268 // For floating-point precision of 6: 4269 // 4270 // Log10ofMantissa = 4271 // -0.50419619f + 4272 // (0.60948995f - 0.10380950f * x) * x; 4273 // 4274 // error 0.0014886165, which is 6 bits 4275 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4276 getF32Constant(DAG, 0xbdd49a13)); 4277 SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, 4278 getF32Constant(DAG, 0x3f1c0789)); 4279 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4280 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, 4281 getF32Constant(DAG, 0x3f011300)); 4282 } else if (LimitFloatPrecision <= 12) { 4283 // For floating-point precision of 12: 4284 // 4285 // Log10ofMantissa = 4286 // -0.64831180f + 4287 // (0.91751397f + 4288 // (-0.31664806f + 0.47637168e-1f * x) * x) * x; 4289 // 4290 // error 0.00019228036, which is better than 12 bits 4291 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4292 getF32Constant(DAG, 0x3d431f31)); 4293 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 4294 getF32Constant(DAG, 0x3ea21fb2)); 4295 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4296 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4297 getF32Constant(DAG, 0x3f6ae232)); 4298 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4299 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 4300 getF32Constant(DAG, 0x3f25f7c3)); 4301 } else { // LimitFloatPrecision <= 18 4302 // For floating-point precision of 18: 4303 // 4304 // Log10ofMantissa = 4305 // -0.84299375f + 4306 // (1.5327582f + 4307 // (-1.0688956f + 4308 // (0.49102474f + 4309 // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x; 4310 // 4311 // error 0.0000037995730, which is better than 18 bits 4312 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X, 4313 getF32Constant(DAG, 0x3c5d51ce)); 4314 SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, 4315 getF32Constant(DAG, 0x3e00685a)); 4316 SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); 4317 SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, 4318 getF32Constant(DAG, 0x3efb6798)); 4319 SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); 4320 SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, 4321 getF32Constant(DAG, 0x3f88d192)); 4322 SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); 4323 SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, 4324 getF32Constant(DAG, 0x3fc4316c)); 4325 SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); 4326 Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, 4327 getF32Constant(DAG, 0x3f57ce70)); 4328 } 4329 4330 return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); 4331 } 4332 4333 // No special expansion. 4334 return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); 4335 } 4336 4337 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for 4338 /// limited-precision mode. 4339 static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, 4340 const TargetLowering &TLI) { 4341 if (Op.getValueType() == MVT::f32 && 4342 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) 4343 return getLimitedPrecisionExp2(Op, dl, DAG); 4344 4345 // No special expansion. 4346 return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); 4347 } 4348 4349 /// visitPow - Lower a pow intrinsic. Handles the special sequences for 4350 /// limited-precision mode with x == 10.0f. 4351 static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, 4352 SelectionDAG &DAG, const TargetLowering &TLI) { 4353 bool IsExp10 = false; 4354 if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && 4355 LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { 4356 if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) { 4357 APFloat Ten(10.0f); 4358 IsExp10 = LHSC->isExactlyValue(Ten); 4359 } 4360 } 4361 4362 if (IsExp10) { 4363 // Put the exponent in the right bit position for later addition to the 4364 // final result: 4365 // 4366 // #define LOG2OF10 3.3219281f 4367 // t0 = Op * LOG2OF10; 4368 SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, 4369 getF32Constant(DAG, 0x40549a78)); 4370 return getLimitedPrecisionExp2(t0, dl, DAG); 4371 } 4372 4373 // No special expansion. 4374 return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); 4375 } 4376 4377 4378 /// ExpandPowI - Expand a llvm.powi intrinsic. 4379 static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, 4380 SelectionDAG &DAG) { 4381 // If RHS is a constant, we can expand this out to a multiplication tree, 4382 // otherwise we end up lowering to a call to __powidf2 (for example). When 4383 // optimizing for size, we only want to do this if the expansion would produce 4384 // a small number of multiplies, otherwise we do the full expansion. 4385 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { 4386 // Get the exponent as a positive value. 4387 unsigned Val = RHSC->getSExtValue(); 4388 if ((int)Val < 0) Val = -Val; 4389 4390 // powi(x, 0) -> 1.0 4391 if (Val == 0) 4392 return DAG.getConstantFP(1.0, LHS.getValueType()); 4393 4394 const Function *F = DAG.getMachineFunction().getFunction(); 4395 if (!F->hasFnAttribute(Attribute::OptimizeForSize) || 4396 // If optimizing for size, don't insert too many multiplies. This 4397 // inserts up to 5 multiplies. 4398 countPopulation(Val) + Log2_32(Val) < 7) { 4399 // We use the simple binary decomposition method to generate the multiply 4400 // sequence. There are more optimal ways to do this (for example, 4401 // powi(x,15) generates one more multiply than it should), but this has 4402 // the benefit of being both really simple and much better than a libcall. 4403 SDValue Res; // Logically starts equal to 1.0 4404 SDValue CurSquare = LHS; 4405 while (Val) { 4406 if (Val & 1) { 4407 if (Res.getNode()) 4408 Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare); 4409 else 4410 Res = CurSquare; // 1.0*CurSquare. 4411 } 4412 4413 CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(), 4414 CurSquare, CurSquare); 4415 Val >>= 1; 4416 } 4417 4418 // If the original was negative, invert the result, producing 1/(x*x*x). 4419 if (RHSC->getSExtValue() < 0) 4420 Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(), 4421 DAG.getConstantFP(1.0, LHS.getValueType()), Res); 4422 return Res; 4423 } 4424 } 4425 4426 // Otherwise, expand to a libcall. 4427 return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS); 4428 } 4429 4430 // getTruncatedArgReg - Find underlying register used for an truncated 4431 // argument. 4432 static unsigned getTruncatedArgReg(const SDValue &N) { 4433 if (N.getOpcode() != ISD::TRUNCATE) 4434 return 0; 4435 4436 const SDValue &Ext = N.getOperand(0); 4437 if (Ext.getOpcode() == ISD::AssertZext || 4438 Ext.getOpcode() == ISD::AssertSext) { 4439 const SDValue &CFR = Ext.getOperand(0); 4440 if (CFR.getOpcode() == ISD::CopyFromReg) 4441 return cast<RegisterSDNode>(CFR.getOperand(1))->getReg(); 4442 if (CFR.getOpcode() == ISD::TRUNCATE) 4443 return getTruncatedArgReg(CFR); 4444 } 4445 return 0; 4446 } 4447 4448 /// EmitFuncArgumentDbgValue - If the DbgValueInst is a dbg_value of a function 4449 /// argument, create the corresponding DBG_VALUE machine instruction for it now. 4450 /// At the end of instruction selection, they will be inserted to the entry BB. 4451 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue( 4452 const Value *V, MDLocalVariable *Variable, MDExpression *Expr, 4453 int64_t Offset, bool IsIndirect, const SDValue &N) { 4454 const Argument *Arg = dyn_cast<Argument>(V); 4455 if (!Arg) 4456 return false; 4457 4458 MachineFunction &MF = DAG.getMachineFunction(); 4459 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); 4460 4461 // Ignore inlined function arguments here. 4462 DIVariable DV(Variable); 4463 if (DV.isInlinedFnArgument(MF.getFunction())) 4464 return false; 4465 4466 Optional<MachineOperand> Op; 4467 // Some arguments' frame index is recorded during argument lowering. 4468 if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) 4469 Op = MachineOperand::CreateFI(FI); 4470 4471 if (!Op && N.getNode()) { 4472 unsigned Reg; 4473 if (N.getOpcode() == ISD::CopyFromReg) 4474 Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg(); 4475 else 4476 Reg = getTruncatedArgReg(N); 4477 if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) { 4478 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 4479 unsigned PR = RegInfo.getLiveInPhysReg(Reg); 4480 if (PR) 4481 Reg = PR; 4482 } 4483 if (Reg) 4484 Op = MachineOperand::CreateReg(Reg, false); 4485 } 4486 4487 if (!Op) { 4488 // Check if ValueMap has reg number. 4489 DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V); 4490 if (VMI != FuncInfo.ValueMap.end()) 4491 Op = MachineOperand::CreateReg(VMI->second, false); 4492 } 4493 4494 if (!Op && N.getNode()) 4495 // Check if frame index is available. 4496 if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode())) 4497 if (FrameIndexSDNode *FINode = 4498 dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) 4499 Op = MachineOperand::CreateFI(FINode->getIndex()); 4500 4501 if (!Op) 4502 return false; 4503 4504 if (Op->isReg()) 4505 FuncInfo.ArgDbgValues.push_back( 4506 BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE), 4507 IsIndirect, Op->getReg(), Offset, Variable, Expr)); 4508 else 4509 FuncInfo.ArgDbgValues.push_back( 4510 BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) 4511 .addOperand(*Op) 4512 .addImm(Offset) 4513 .addMetadata(Variable) 4514 .addMetadata(Expr)); 4515 4516 return true; 4517 } 4518 4519 // VisualStudio defines setjmp as _setjmp 4520 #if defined(_MSC_VER) && defined(setjmp) && \ 4521 !defined(setjmp_undefined_for_msvc) 4522 # pragma push_macro("setjmp") 4523 # undef setjmp 4524 # define setjmp_undefined_for_msvc 4525 #endif 4526 4527 /// visitIntrinsicCall - Lower the call to the specified intrinsic function. If 4528 /// we want to emit this as a call to a named external function, return the name 4529 /// otherwise lower it and return null. 4530 const char * 4531 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { 4532 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4533 SDLoc sdl = getCurSDLoc(); 4534 DebugLoc dl = getCurDebugLoc(); 4535 SDValue Res; 4536 4537 switch (Intrinsic) { 4538 default: 4539 // By default, turn this into a target intrinsic node. 4540 visitTargetIntrinsic(I, Intrinsic); 4541 return nullptr; 4542 case Intrinsic::vastart: visitVAStart(I); return nullptr; 4543 case Intrinsic::vaend: visitVAEnd(I); return nullptr; 4544 case Intrinsic::vacopy: visitVACopy(I); return nullptr; 4545 case Intrinsic::returnaddress: 4546 setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI.getPointerTy(), 4547 getValue(I.getArgOperand(0)))); 4548 return nullptr; 4549 case Intrinsic::frameaddress: 4550 setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), 4551 getValue(I.getArgOperand(0)))); 4552 return nullptr; 4553 case Intrinsic::read_register: { 4554 Value *Reg = I.getArgOperand(0); 4555 SDValue RegName = 4556 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); 4557 EVT VT = TLI.getValueType(I.getType()); 4558 setValue(&I, DAG.getNode(ISD::READ_REGISTER, sdl, VT, RegName)); 4559 return nullptr; 4560 } 4561 case Intrinsic::write_register: { 4562 Value *Reg = I.getArgOperand(0); 4563 Value *RegValue = I.getArgOperand(1); 4564 SDValue Chain = getValue(RegValue).getOperand(0); 4565 SDValue RegName = 4566 DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata())); 4567 DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain, 4568 RegName, getValue(RegValue))); 4569 return nullptr; 4570 } 4571 case Intrinsic::setjmp: 4572 return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; 4573 case Intrinsic::longjmp: 4574 return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; 4575 case Intrinsic::memcpy: { 4576 // FIXME: this definition of "user defined address space" is x86-specific 4577 // Assert for address < 256 since we support only user defined address 4578 // spaces. 4579 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 4580 < 256 && 4581 cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() 4582 < 256 && 4583 "Unknown address space"); 4584 SDValue Op1 = getValue(I.getArgOperand(0)); 4585 SDValue Op2 = getValue(I.getArgOperand(1)); 4586 SDValue Op3 = getValue(I.getArgOperand(2)); 4587 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 4588 if (!Align) 4589 Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. 4590 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 4591 DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, 4592 MachinePointerInfo(I.getArgOperand(0)), 4593 MachinePointerInfo(I.getArgOperand(1)))); 4594 return nullptr; 4595 } 4596 case Intrinsic::memset: { 4597 // FIXME: this definition of "user defined address space" is x86-specific 4598 // Assert for address < 256 since we support only user defined address 4599 // spaces. 4600 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 4601 < 256 && 4602 "Unknown address space"); 4603 SDValue Op1 = getValue(I.getArgOperand(0)); 4604 SDValue Op2 = getValue(I.getArgOperand(1)); 4605 SDValue Op3 = getValue(I.getArgOperand(2)); 4606 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 4607 if (!Align) 4608 Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. 4609 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 4610 DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, 4611 MachinePointerInfo(I.getArgOperand(0)))); 4612 return nullptr; 4613 } 4614 case Intrinsic::memmove: { 4615 // FIXME: this definition of "user defined address space" is x86-specific 4616 // Assert for address < 256 since we support only user defined address 4617 // spaces. 4618 assert(cast<PointerType>(I.getArgOperand(0)->getType())->getAddressSpace() 4619 < 256 && 4620 cast<PointerType>(I.getArgOperand(1)->getType())->getAddressSpace() 4621 < 256 && 4622 "Unknown address space"); 4623 SDValue Op1 = getValue(I.getArgOperand(0)); 4624 SDValue Op2 = getValue(I.getArgOperand(1)); 4625 SDValue Op3 = getValue(I.getArgOperand(2)); 4626 unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue(); 4627 if (!Align) 4628 Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. 4629 bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue(); 4630 DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, 4631 MachinePointerInfo(I.getArgOperand(0)), 4632 MachinePointerInfo(I.getArgOperand(1)))); 4633 return nullptr; 4634 } 4635 case Intrinsic::dbg_declare: { 4636 const DbgDeclareInst &DI = cast<DbgDeclareInst>(I); 4637 MDLocalVariable *Variable = DI.getVariable(); 4638 MDExpression *Expression = DI.getExpression(); 4639 const Value *Address = DI.getAddress(); 4640 DIVariable DIVar(Variable); 4641 assert((!DIVar || DIVar.isVariable()) && 4642 "Variable in DbgDeclareInst should be either null or a DIVariable."); 4643 if (!Address || !DIVar) { 4644 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4645 return nullptr; 4646 } 4647 4648 // Check if address has undef value. 4649 if (isa<UndefValue>(Address) || 4650 (Address->use_empty() && !isa<Argument>(Address))) { 4651 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4652 return nullptr; 4653 } 4654 4655 SDValue &N = NodeMap[Address]; 4656 if (!N.getNode() && isa<Argument>(Address)) 4657 // Check unused arguments map. 4658 N = UnusedArgNodeMap[Address]; 4659 SDDbgValue *SDV; 4660 if (N.getNode()) { 4661 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address)) 4662 Address = BCI->getOperand(0); 4663 // Parameters are handled specially. 4664 bool isParameter = 4665 (DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable || 4666 isa<Argument>(Address)); 4667 4668 const AllocaInst *AI = dyn_cast<AllocaInst>(Address); 4669 4670 if (isParameter && !AI) { 4671 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(N.getNode()); 4672 if (FINode) 4673 // Byval parameter. We have a frame index at this point. 4674 SDV = DAG.getFrameIndexDbgValue( 4675 Variable, Expression, FINode->getIndex(), 0, dl, SDNodeOrder); 4676 else { 4677 // Address is an argument, so try to emit its dbg value using 4678 // virtual register info from the FuncInfo.ValueMap. 4679 EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, N); 4680 return nullptr; 4681 } 4682 } else if (AI) 4683 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), 4684 true, 0, dl, SDNodeOrder); 4685 else { 4686 // Can't do anything with other non-AI cases yet. 4687 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4688 DEBUG(dbgs() << "non-AllocaInst issue for Address: \n\t"); 4689 DEBUG(Address->dump()); 4690 return nullptr; 4691 } 4692 DAG.AddDbgValue(SDV, N.getNode(), isParameter); 4693 } else { 4694 // If Address is an argument then try to emit its dbg value using 4695 // virtual register info from the FuncInfo.ValueMap. 4696 if (!EmitFuncArgumentDbgValue(Address, Variable, Expression, 0, false, 4697 N)) { 4698 // If variable is pinned by a alloca in dominating bb then 4699 // use StaticAllocaMap. 4700 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) { 4701 if (AI->getParent() != DI.getParent()) { 4702 DenseMap<const AllocaInst*, int>::iterator SI = 4703 FuncInfo.StaticAllocaMap.find(AI); 4704 if (SI != FuncInfo.StaticAllocaMap.end()) { 4705 SDV = DAG.getFrameIndexDbgValue(Variable, Expression, SI->second, 4706 0, dl, SDNodeOrder); 4707 DAG.AddDbgValue(SDV, nullptr, false); 4708 return nullptr; 4709 } 4710 } 4711 } 4712 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4713 } 4714 } 4715 return nullptr; 4716 } 4717 case Intrinsic::dbg_value: { 4718 const DbgValueInst &DI = cast<DbgValueInst>(I); 4719 DIVariable DIVar(DI.getVariable()); 4720 assert((!DIVar || DIVar.isVariable()) && 4721 "Variable in DbgValueInst should be either null or a DIVariable."); 4722 if (!DIVar) 4723 return nullptr; 4724 4725 MDLocalVariable *Variable = DI.getVariable(); 4726 MDExpression *Expression = DI.getExpression(); 4727 uint64_t Offset = DI.getOffset(); 4728 const Value *V = DI.getValue(); 4729 if (!V) 4730 return nullptr; 4731 4732 SDDbgValue *SDV; 4733 if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V)) { 4734 SDV = DAG.getConstantDbgValue(Variable, Expression, V, Offset, dl, 4735 SDNodeOrder); 4736 DAG.AddDbgValue(SDV, nullptr, false); 4737 } else { 4738 // Do not use getValue() in here; we don't want to generate code at 4739 // this point if it hasn't been done yet. 4740 SDValue N = NodeMap[V]; 4741 if (!N.getNode() && isa<Argument>(V)) 4742 // Check unused arguments map. 4743 N = UnusedArgNodeMap[V]; 4744 if (N.getNode()) { 4745 // A dbg.value for an alloca is always indirect. 4746 bool IsIndirect = isa<AllocaInst>(V) || Offset != 0; 4747 if (!EmitFuncArgumentDbgValue(V, Variable, Expression, Offset, 4748 IsIndirect, N)) { 4749 SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(), 4750 IsIndirect, Offset, dl, SDNodeOrder); 4751 DAG.AddDbgValue(SDV, N.getNode(), false); 4752 } 4753 } else if (!V->use_empty() ) { 4754 // Do not call getValue(V) yet, as we don't want to generate code. 4755 // Remember it for later. 4756 DanglingDebugInfo DDI(&DI, dl, SDNodeOrder); 4757 DanglingDebugInfoMap[V] = DDI; 4758 } else { 4759 // We may expand this to cover more cases. One case where we have no 4760 // data available is an unreferenced parameter. 4761 DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); 4762 } 4763 } 4764 4765 // Build a debug info table entry. 4766 if (const BitCastInst *BCI = dyn_cast<BitCastInst>(V)) 4767 V = BCI->getOperand(0); 4768 const AllocaInst *AI = dyn_cast<AllocaInst>(V); 4769 // Don't handle byval struct arguments or VLAs, for example. 4770 if (!AI) { 4771 DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n"); 4772 DEBUG(dbgs() << " Last seen at:\n " << *V << "\n"); 4773 return nullptr; 4774 } 4775 DenseMap<const AllocaInst*, int>::iterator SI = 4776 FuncInfo.StaticAllocaMap.find(AI); 4777 if (SI == FuncInfo.StaticAllocaMap.end()) 4778 return nullptr; // VLAs. 4779 return nullptr; 4780 } 4781 4782 case Intrinsic::eh_typeid_for: { 4783 // Find the type id for the given typeinfo. 4784 GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0)); 4785 unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); 4786 Res = DAG.getConstant(TypeID, MVT::i32); 4787 setValue(&I, Res); 4788 return nullptr; 4789 } 4790 4791 case Intrinsic::eh_return_i32: 4792 case Intrinsic::eh_return_i64: 4793 DAG.getMachineFunction().getMMI().setCallsEHReturn(true); 4794 DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, 4795 MVT::Other, 4796 getControlRoot(), 4797 getValue(I.getArgOperand(0)), 4798 getValue(I.getArgOperand(1)))); 4799 return nullptr; 4800 case Intrinsic::eh_unwind_init: 4801 DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); 4802 return nullptr; 4803 case Intrinsic::eh_dwarf_cfa: { 4804 SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, 4805 TLI.getPointerTy()); 4806 SDValue Offset = DAG.getNode(ISD::ADD, sdl, 4807 CfaArg.getValueType(), 4808 DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, 4809 CfaArg.getValueType()), 4810 CfaArg); 4811 SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, TLI.getPointerTy(), 4812 DAG.getConstant(0, TLI.getPointerTy())); 4813 setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), 4814 FA, Offset)); 4815 return nullptr; 4816 } 4817 case Intrinsic::eh_sjlj_callsite: { 4818 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 4819 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0)); 4820 assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); 4821 assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); 4822 4823 MMI.setCurrentCallSite(CI->getZExtValue()); 4824 return nullptr; 4825 } 4826 case Intrinsic::eh_sjlj_functioncontext: { 4827 // Get and store the index of the function context. 4828 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 4829 AllocaInst *FnCtx = 4830 cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); 4831 int FI = FuncInfo.StaticAllocaMap[FnCtx]; 4832 MFI->setFunctionContextIndex(FI); 4833 return nullptr; 4834 } 4835 case Intrinsic::eh_sjlj_setjmp: { 4836 SDValue Ops[2]; 4837 Ops[0] = getRoot(); 4838 Ops[1] = getValue(I.getArgOperand(0)); 4839 SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, 4840 DAG.getVTList(MVT::i32, MVT::Other), Ops); 4841 setValue(&I, Op.getValue(0)); 4842 DAG.setRoot(Op.getValue(1)); 4843 return nullptr; 4844 } 4845 case Intrinsic::eh_sjlj_longjmp: { 4846 DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, 4847 getRoot(), getValue(I.getArgOperand(0)))); 4848 return nullptr; 4849 } 4850 4851 case Intrinsic::masked_load: 4852 visitMaskedLoad(I); 4853 return nullptr; 4854 case Intrinsic::masked_store: 4855 visitMaskedStore(I); 4856 return nullptr; 4857 case Intrinsic::x86_mmx_pslli_w: 4858 case Intrinsic::x86_mmx_pslli_d: 4859 case Intrinsic::x86_mmx_pslli_q: 4860 case Intrinsic::x86_mmx_psrli_w: 4861 case Intrinsic::x86_mmx_psrli_d: 4862 case Intrinsic::x86_mmx_psrli_q: 4863 case Intrinsic::x86_mmx_psrai_w: 4864 case Intrinsic::x86_mmx_psrai_d: { 4865 SDValue ShAmt = getValue(I.getArgOperand(1)); 4866 if (isa<ConstantSDNode>(ShAmt)) { 4867 visitTargetIntrinsic(I, Intrinsic); 4868 return nullptr; 4869 } 4870 unsigned NewIntrinsic = 0; 4871 EVT ShAmtVT = MVT::v2i32; 4872 switch (Intrinsic) { 4873 case Intrinsic::x86_mmx_pslli_w: 4874 NewIntrinsic = Intrinsic::x86_mmx_psll_w; 4875 break; 4876 case Intrinsic::x86_mmx_pslli_d: 4877 NewIntrinsic = Intrinsic::x86_mmx_psll_d; 4878 break; 4879 case Intrinsic::x86_mmx_pslli_q: 4880 NewIntrinsic = Intrinsic::x86_mmx_psll_q; 4881 break; 4882 case Intrinsic::x86_mmx_psrli_w: 4883 NewIntrinsic = Intrinsic::x86_mmx_psrl_w; 4884 break; 4885 case Intrinsic::x86_mmx_psrli_d: 4886 NewIntrinsic = Intrinsic::x86_mmx_psrl_d; 4887 break; 4888 case Intrinsic::x86_mmx_psrli_q: 4889 NewIntrinsic = Intrinsic::x86_mmx_psrl_q; 4890 break; 4891 case Intrinsic::x86_mmx_psrai_w: 4892 NewIntrinsic = Intrinsic::x86_mmx_psra_w; 4893 break; 4894 case Intrinsic::x86_mmx_psrai_d: 4895 NewIntrinsic = Intrinsic::x86_mmx_psra_d; 4896 break; 4897 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 4898 } 4899 4900 // The vector shift intrinsics with scalars uses 32b shift amounts but 4901 // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits 4902 // to be zero. 4903 // We must do this early because v2i32 is not a legal type. 4904 SDValue ShOps[2]; 4905 ShOps[0] = ShAmt; 4906 ShOps[1] = DAG.getConstant(0, MVT::i32); 4907 ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, ShOps); 4908 EVT DestVT = TLI.getValueType(I.getType()); 4909 ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); 4910 Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, 4911 DAG.getConstant(NewIntrinsic, MVT::i32), 4912 getValue(I.getArgOperand(0)), ShAmt); 4913 setValue(&I, Res); 4914 return nullptr; 4915 } 4916 case Intrinsic::convertff: 4917 case Intrinsic::convertfsi: 4918 case Intrinsic::convertfui: 4919 case Intrinsic::convertsif: 4920 case Intrinsic::convertuif: 4921 case Intrinsic::convertss: 4922 case Intrinsic::convertsu: 4923 case Intrinsic::convertus: 4924 case Intrinsic::convertuu: { 4925 ISD::CvtCode Code = ISD::CVT_INVALID; 4926 switch (Intrinsic) { 4927 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 4928 case Intrinsic::convertff: Code = ISD::CVT_FF; break; 4929 case Intrinsic::convertfsi: Code = ISD::CVT_FS; break; 4930 case Intrinsic::convertfui: Code = ISD::CVT_FU; break; 4931 case Intrinsic::convertsif: Code = ISD::CVT_SF; break; 4932 case Intrinsic::convertuif: Code = ISD::CVT_UF; break; 4933 case Intrinsic::convertss: Code = ISD::CVT_SS; break; 4934 case Intrinsic::convertsu: Code = ISD::CVT_SU; break; 4935 case Intrinsic::convertus: Code = ISD::CVT_US; break; 4936 case Intrinsic::convertuu: Code = ISD::CVT_UU; break; 4937 } 4938 EVT DestVT = TLI.getValueType(I.getType()); 4939 const Value *Op1 = I.getArgOperand(0); 4940 Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), 4941 DAG.getValueType(DestVT), 4942 DAG.getValueType(getValue(Op1).getValueType()), 4943 getValue(I.getArgOperand(1)), 4944 getValue(I.getArgOperand(2)), 4945 Code); 4946 setValue(&I, Res); 4947 return nullptr; 4948 } 4949 case Intrinsic::powi: 4950 setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), 4951 getValue(I.getArgOperand(1)), DAG)); 4952 return nullptr; 4953 case Intrinsic::log: 4954 setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); 4955 return nullptr; 4956 case Intrinsic::log2: 4957 setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); 4958 return nullptr; 4959 case Intrinsic::log10: 4960 setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); 4961 return nullptr; 4962 case Intrinsic::exp: 4963 setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); 4964 return nullptr; 4965 case Intrinsic::exp2: 4966 setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI)); 4967 return nullptr; 4968 case Intrinsic::pow: 4969 setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), 4970 getValue(I.getArgOperand(1)), DAG, TLI)); 4971 return nullptr; 4972 case Intrinsic::sqrt: 4973 case Intrinsic::fabs: 4974 case Intrinsic::sin: 4975 case Intrinsic::cos: 4976 case Intrinsic::floor: 4977 case Intrinsic::ceil: 4978 case Intrinsic::trunc: 4979 case Intrinsic::rint: 4980 case Intrinsic::nearbyint: 4981 case Intrinsic::round: { 4982 unsigned Opcode; 4983 switch (Intrinsic) { 4984 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 4985 case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; 4986 case Intrinsic::fabs: Opcode = ISD::FABS; break; 4987 case Intrinsic::sin: Opcode = ISD::FSIN; break; 4988 case Intrinsic::cos: Opcode = ISD::FCOS; break; 4989 case Intrinsic::floor: Opcode = ISD::FFLOOR; break; 4990 case Intrinsic::ceil: Opcode = ISD::FCEIL; break; 4991 case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; 4992 case Intrinsic::rint: Opcode = ISD::FRINT; break; 4993 case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; 4994 case Intrinsic::round: Opcode = ISD::FROUND; break; 4995 } 4996 4997 setValue(&I, DAG.getNode(Opcode, sdl, 4998 getValue(I.getArgOperand(0)).getValueType(), 4999 getValue(I.getArgOperand(0)))); 5000 return nullptr; 5001 } 5002 case Intrinsic::minnum: 5003 setValue(&I, DAG.getNode(ISD::FMINNUM, sdl, 5004 getValue(I.getArgOperand(0)).getValueType(), 5005 getValue(I.getArgOperand(0)), 5006 getValue(I.getArgOperand(1)))); 5007 return nullptr; 5008 case Intrinsic::maxnum: 5009 setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl, 5010 getValue(I.getArgOperand(0)).getValueType(), 5011 getValue(I.getArgOperand(0)), 5012 getValue(I.getArgOperand(1)))); 5013 return nullptr; 5014 case Intrinsic::copysign: 5015 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, 5016 getValue(I.getArgOperand(0)).getValueType(), 5017 getValue(I.getArgOperand(0)), 5018 getValue(I.getArgOperand(1)))); 5019 return nullptr; 5020 case Intrinsic::fma: 5021 setValue(&I, DAG.getNode(ISD::FMA, sdl, 5022 getValue(I.getArgOperand(0)).getValueType(), 5023 getValue(I.getArgOperand(0)), 5024 getValue(I.getArgOperand(1)), 5025 getValue(I.getArgOperand(2)))); 5026 return nullptr; 5027 case Intrinsic::fmuladd: { 5028 EVT VT = TLI.getValueType(I.getType()); 5029 if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && 5030 TLI.isFMAFasterThanFMulAndFAdd(VT)) { 5031 setValue(&I, DAG.getNode(ISD::FMA, sdl, 5032 getValue(I.getArgOperand(0)).getValueType(), 5033 getValue(I.getArgOperand(0)), 5034 getValue(I.getArgOperand(1)), 5035 getValue(I.getArgOperand(2)))); 5036 } else { 5037 SDValue Mul = DAG.getNode(ISD::FMUL, sdl, 5038 getValue(I.getArgOperand(0)).getValueType(), 5039 getValue(I.getArgOperand(0)), 5040 getValue(I.getArgOperand(1))); 5041 SDValue Add = DAG.getNode(ISD::FADD, sdl, 5042 getValue(I.getArgOperand(0)).getValueType(), 5043 Mul, 5044 getValue(I.getArgOperand(2))); 5045 setValue(&I, Add); 5046 } 5047 return nullptr; 5048 } 5049 case Intrinsic::convert_to_fp16: 5050 setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16, 5051 DAG.getNode(ISD::FP_ROUND, sdl, MVT::f16, 5052 getValue(I.getArgOperand(0)), 5053 DAG.getTargetConstant(0, MVT::i32)))); 5054 return nullptr; 5055 case Intrinsic::convert_from_fp16: 5056 setValue(&I, 5057 DAG.getNode(ISD::FP_EXTEND, sdl, TLI.getValueType(I.getType()), 5058 DAG.getNode(ISD::BITCAST, sdl, MVT::f16, 5059 getValue(I.getArgOperand(0))))); 5060 return nullptr; 5061 case Intrinsic::pcmarker: { 5062 SDValue Tmp = getValue(I.getArgOperand(0)); 5063 DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); 5064 return nullptr; 5065 } 5066 case Intrinsic::readcyclecounter: { 5067 SDValue Op = getRoot(); 5068 Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, 5069 DAG.getVTList(MVT::i64, MVT::Other), Op); 5070 setValue(&I, Res); 5071 DAG.setRoot(Res.getValue(1)); 5072 return nullptr; 5073 } 5074 case Intrinsic::bswap: 5075 setValue(&I, DAG.getNode(ISD::BSWAP, sdl, 5076 getValue(I.getArgOperand(0)).getValueType(), 5077 getValue(I.getArgOperand(0)))); 5078 return nullptr; 5079 case Intrinsic::cttz: { 5080 SDValue Arg = getValue(I.getArgOperand(0)); 5081 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); 5082 EVT Ty = Arg.getValueType(); 5083 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, 5084 sdl, Ty, Arg)); 5085 return nullptr; 5086 } 5087 case Intrinsic::ctlz: { 5088 SDValue Arg = getValue(I.getArgOperand(0)); 5089 ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1)); 5090 EVT Ty = Arg.getValueType(); 5091 setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, 5092 sdl, Ty, Arg)); 5093 return nullptr; 5094 } 5095 case Intrinsic::ctpop: { 5096 SDValue Arg = getValue(I.getArgOperand(0)); 5097 EVT Ty = Arg.getValueType(); 5098 setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); 5099 return nullptr; 5100 } 5101 case Intrinsic::stacksave: { 5102 SDValue Op = getRoot(); 5103 Res = DAG.getNode(ISD::STACKSAVE, sdl, 5104 DAG.getVTList(TLI.getPointerTy(), MVT::Other), Op); 5105 setValue(&I, Res); 5106 DAG.setRoot(Res.getValue(1)); 5107 return nullptr; 5108 } 5109 case Intrinsic::stackrestore: { 5110 Res = getValue(I.getArgOperand(0)); 5111 DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); 5112 return nullptr; 5113 } 5114 case Intrinsic::stackprotector: { 5115 // Emit code into the DAG to store the stack guard onto the stack. 5116 MachineFunction &MF = DAG.getMachineFunction(); 5117 MachineFrameInfo *MFI = MF.getFrameInfo(); 5118 EVT PtrTy = TLI.getPointerTy(); 5119 SDValue Src, Chain = getRoot(); 5120 const Value *Ptr = cast<LoadInst>(I.getArgOperand(0))->getPointerOperand(); 5121 const GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr); 5122 5123 // See if Ptr is a bitcast. If it is, look through it and see if we can get 5124 // global variable __stack_chk_guard. 5125 if (!GV) 5126 if (const Operator *BC = dyn_cast<Operator>(Ptr)) 5127 if (BC->getOpcode() == Instruction::BitCast) 5128 GV = dyn_cast<GlobalVariable>(BC->getOperand(0)); 5129 5130 if (GV && TLI.useLoadStackGuardNode()) { 5131 // Emit a LOAD_STACK_GUARD node. 5132 MachineSDNode *Node = DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, 5133 sdl, PtrTy, Chain); 5134 MachinePointerInfo MPInfo(GV); 5135 MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1); 5136 unsigned Flags = MachineMemOperand::MOLoad | 5137 MachineMemOperand::MOInvariant; 5138 *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, 5139 PtrTy.getSizeInBits() / 8, 5140 DAG.getEVTAlignment(PtrTy)); 5141 Node->setMemRefs(MemRefs, MemRefs + 1); 5142 5143 // Copy the guard value to a virtual register so that it can be 5144 // retrieved in the epilogue. 5145 Src = SDValue(Node, 0); 5146 const TargetRegisterClass *RC = 5147 TLI.getRegClassFor(Src.getSimpleValueType()); 5148 unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); 5149 5150 SPDescriptor.setGuardReg(Reg); 5151 Chain = DAG.getCopyToReg(Chain, sdl, Reg, Src); 5152 } else { 5153 Src = getValue(I.getArgOperand(0)); // The guard's value. 5154 } 5155 5156 AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); 5157 5158 int FI = FuncInfo.StaticAllocaMap[Slot]; 5159 MFI->setStackProtectorIndex(FI); 5160 5161 SDValue FIN = DAG.getFrameIndex(FI, PtrTy); 5162 5163 // Store the stack protector onto the stack. 5164 Res = DAG.getStore(Chain, sdl, Src, FIN, 5165 MachinePointerInfo::getFixedStack(FI), 5166 true, false, 0); 5167 setValue(&I, Res); 5168 DAG.setRoot(Res); 5169 return nullptr; 5170 } 5171 case Intrinsic::objectsize: { 5172 // If we don't know by now, we're never going to know. 5173 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); 5174 5175 assert(CI && "Non-constant type in __builtin_object_size?"); 5176 5177 SDValue Arg = getValue(I.getCalledValue()); 5178 EVT Ty = Arg.getValueType(); 5179 5180 if (CI->isZero()) 5181 Res = DAG.getConstant(-1ULL, Ty); 5182 else 5183 Res = DAG.getConstant(0, Ty); 5184 5185 setValue(&I, Res); 5186 return nullptr; 5187 } 5188 case Intrinsic::annotation: 5189 case Intrinsic::ptr_annotation: 5190 // Drop the intrinsic, but forward the value 5191 setValue(&I, getValue(I.getOperand(0))); 5192 return nullptr; 5193 case Intrinsic::assume: 5194 case Intrinsic::var_annotation: 5195 // Discard annotate attributes and assumptions 5196 return nullptr; 5197 5198 case Intrinsic::init_trampoline: { 5199 const Function *F = cast<Function>(I.getArgOperand(1)->stripPointerCasts()); 5200 5201 SDValue Ops[6]; 5202 Ops[0] = getRoot(); 5203 Ops[1] = getValue(I.getArgOperand(0)); 5204 Ops[2] = getValue(I.getArgOperand(1)); 5205 Ops[3] = getValue(I.getArgOperand(2)); 5206 Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); 5207 Ops[5] = DAG.getSrcValue(F); 5208 5209 Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops); 5210 5211 DAG.setRoot(Res); 5212 return nullptr; 5213 } 5214 case Intrinsic::adjust_trampoline: { 5215 setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, 5216 TLI.getPointerTy(), 5217 getValue(I.getArgOperand(0)))); 5218 return nullptr; 5219 } 5220 case Intrinsic::gcroot: 5221 if (GFI) { 5222 const Value *Alloca = I.getArgOperand(0)->stripPointerCasts(); 5223 const Constant *TypeMap = cast<Constant>(I.getArgOperand(1)); 5224 5225 FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode()); 5226 GFI->addStackRoot(FI->getIndex(), TypeMap); 5227 } 5228 return nullptr; 5229 case Intrinsic::gcread: 5230 case Intrinsic::gcwrite: 5231 llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); 5232 case Intrinsic::flt_rounds: 5233 setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); 5234 return nullptr; 5235 5236 case Intrinsic::expect: { 5237 // Just replace __builtin_expect(exp, c) with EXP. 5238 setValue(&I, getValue(I.getArgOperand(0))); 5239 return nullptr; 5240 } 5241 5242 case Intrinsic::debugtrap: 5243 case Intrinsic::trap: { 5244 StringRef TrapFuncName = TM.Options.getTrapFunctionName(); 5245 if (TrapFuncName.empty()) { 5246 ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? 5247 ISD::TRAP : ISD::DEBUGTRAP; 5248 DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); 5249 return nullptr; 5250 } 5251 TargetLowering::ArgListTy Args; 5252 5253 TargetLowering::CallLoweringInfo CLI(DAG); 5254 CLI.setDebugLoc(sdl).setChain(getRoot()) 5255 .setCallee(CallingConv::C, I.getType(), 5256 DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), 5257 std::move(Args), 0); 5258 5259 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); 5260 DAG.setRoot(Result.second); 5261 return nullptr; 5262 } 5263 5264 case Intrinsic::uadd_with_overflow: 5265 case Intrinsic::sadd_with_overflow: 5266 case Intrinsic::usub_with_overflow: 5267 case Intrinsic::ssub_with_overflow: 5268 case Intrinsic::umul_with_overflow: 5269 case Intrinsic::smul_with_overflow: { 5270 ISD::NodeType Op; 5271 switch (Intrinsic) { 5272 default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. 5273 case Intrinsic::uadd_with_overflow: Op = ISD::UADDO; break; 5274 case Intrinsic::sadd_with_overflow: Op = ISD::SADDO; break; 5275 case Intrinsic::usub_with_overflow: Op = ISD::USUBO; break; 5276 case Intrinsic::ssub_with_overflow: Op = ISD::SSUBO; break; 5277 case Intrinsic::umul_with_overflow: Op = ISD::UMULO; break; 5278 case Intrinsic::smul_with_overflow: Op = ISD::SMULO; break; 5279 } 5280 SDValue Op1 = getValue(I.getArgOperand(0)); 5281 SDValue Op2 = getValue(I.getArgOperand(1)); 5282 5283 SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); 5284 setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); 5285 return nullptr; 5286 } 5287 case Intrinsic::prefetch: { 5288 SDValue Ops[5]; 5289 unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue(); 5290 Ops[0] = getRoot(); 5291 Ops[1] = getValue(I.getArgOperand(0)); 5292 Ops[2] = getValue(I.getArgOperand(1)); 5293 Ops[3] = getValue(I.getArgOperand(2)); 5294 Ops[4] = getValue(I.getArgOperand(3)); 5295 DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, 5296 DAG.getVTList(MVT::Other), Ops, 5297 EVT::getIntegerVT(*Context, 8), 5298 MachinePointerInfo(I.getArgOperand(0)), 5299 0, /* align */ 5300 false, /* volatile */ 5301 rw==0, /* read */ 5302 rw==1)); /* write */ 5303 return nullptr; 5304 } 5305 case Intrinsic::lifetime_start: 5306 case Intrinsic::lifetime_end: { 5307 bool IsStart = (Intrinsic == Intrinsic::lifetime_start); 5308 // Stack coloring is not enabled in O0, discard region information. 5309 if (TM.getOptLevel() == CodeGenOpt::None) 5310 return nullptr; 5311 5312 SmallVector<Value *, 4> Allocas; 5313 GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL); 5314 5315 for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(), 5316 E = Allocas.end(); Object != E; ++Object) { 5317 AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object); 5318 5319 // Could not find an Alloca. 5320 if (!LifetimeObject) 5321 continue; 5322 5323 // First check that the Alloca is static, otherwise it won't have a 5324 // valid frame index. 5325 auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject); 5326 if (SI == FuncInfo.StaticAllocaMap.end()) 5327 return nullptr; 5328 5329 int FI = SI->second; 5330 5331 SDValue Ops[2]; 5332 Ops[0] = getRoot(); 5333 Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); 5334 unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); 5335 5336 Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops); 5337 DAG.setRoot(Res); 5338 } 5339 return nullptr; 5340 } 5341 case Intrinsic::invariant_start: 5342 // Discard region information. 5343 setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); 5344 return nullptr; 5345 case Intrinsic::invariant_end: 5346 // Discard region information. 5347 return nullptr; 5348 case Intrinsic::stackprotectorcheck: { 5349 // Do not actually emit anything for this basic block. Instead we initialize 5350 // the stack protector descriptor and export the guard variable so we can 5351 // access it in FinishBasicBlock. 5352 const BasicBlock *BB = I.getParent(); 5353 SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); 5354 ExportFromCurrentBlock(SPDescriptor.getGuard()); 5355 5356 // Flush our exports since we are going to process a terminator. 5357 (void)getControlRoot(); 5358 return nullptr; 5359 } 5360 case Intrinsic::clear_cache: 5361 return TLI.getClearCacheBuiltinName(); 5362 case Intrinsic::eh_actions: 5363 setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); 5364 return nullptr; 5365 case Intrinsic::donothing: 5366 // ignore 5367 return nullptr; 5368 case Intrinsic::experimental_stackmap: { 5369 visitStackmap(I); 5370 return nullptr; 5371 } 5372 case Intrinsic::experimental_patchpoint_void: 5373 case Intrinsic::experimental_patchpoint_i64: { 5374 visitPatchpoint(&I); 5375 return nullptr; 5376 } 5377 case Intrinsic::experimental_gc_statepoint: { 5378 visitStatepoint(I); 5379 return nullptr; 5380 } 5381 case Intrinsic::experimental_gc_result_int: 5382 case Intrinsic::experimental_gc_result_float: 5383 case Intrinsic::experimental_gc_result_ptr: 5384 case Intrinsic::experimental_gc_result: { 5385 visitGCResult(I); 5386 return nullptr; 5387 } 5388 case Intrinsic::experimental_gc_relocate: { 5389 visitGCRelocate(I); 5390 return nullptr; 5391 } 5392 case Intrinsic::instrprof_increment: 5393 llvm_unreachable("instrprof failed to lower an increment"); 5394 5395 case Intrinsic::frameescape: { 5396 MachineFunction &MF = DAG.getMachineFunction(); 5397 const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo(); 5398 5399 // Directly emit some FRAME_ALLOC machine instrs. Label assignment emission 5400 // is the same on all targets. 5401 for (unsigned Idx = 0, E = I.getNumArgOperands(); Idx < E; ++Idx) { 5402 AllocaInst *Slot = 5403 cast<AllocaInst>(I.getArgOperand(Idx)->stripPointerCasts()); 5404 assert(FuncInfo.StaticAllocaMap.count(Slot) && 5405 "can only escape static allocas"); 5406 int FI = FuncInfo.StaticAllocaMap[Slot]; 5407 MCSymbol *FrameAllocSym = 5408 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(MF.getName(), 5409 Idx); 5410 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, dl, 5411 TII->get(TargetOpcode::FRAME_ALLOC)) 5412 .addSym(FrameAllocSym) 5413 .addFrameIndex(FI); 5414 } 5415 5416 return nullptr; 5417 } 5418 5419 case Intrinsic::framerecover: { 5420 // i8* @llvm.framerecover(i8* %fn, i8* %fp, i32 %idx) 5421 MachineFunction &MF = DAG.getMachineFunction(); 5422 MVT PtrVT = TLI.getPointerTy(0); 5423 5424 // Get the symbol that defines the frame offset. 5425 auto *Fn = cast<Function>(I.getArgOperand(0)->stripPointerCasts()); 5426 auto *Idx = cast<ConstantInt>(I.getArgOperand(2)); 5427 unsigned IdxVal = unsigned(Idx->getLimitedValue(INT_MAX)); 5428 MCSymbol *FrameAllocSym = 5429 MF.getMMI().getContext().getOrCreateFrameAllocSymbol(Fn->getName(), 5430 IdxVal); 5431 5432 // Create a TargetExternalSymbol for the label to avoid any target lowering 5433 // that would make this PC relative. 5434 StringRef Name = FrameAllocSym->getName(); 5435 assert(Name.data()[Name.size()] == '\0' && "not null terminated"); 5436 SDValue OffsetSym = DAG.getTargetExternalSymbol(Name.data(), PtrVT); 5437 SDValue OffsetVal = 5438 DAG.getNode(ISD::FRAME_ALLOC_RECOVER, sdl, PtrVT, OffsetSym); 5439 5440 // Add the offset to the FP. 5441 Value *FP = I.getArgOperand(1); 5442 SDValue FPVal = getValue(FP); 5443 SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal); 5444 setValue(&I, Add); 5445 5446 return nullptr; 5447 } 5448 case Intrinsic::eh_begincatch: 5449 case Intrinsic::eh_endcatch: 5450 llvm_unreachable("begin/end catch intrinsics not lowered in codegen"); 5451 case Intrinsic::eh_unwindhelp: { 5452 AllocaInst *Slot = 5453 cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts()); 5454 assert(FuncInfo.StaticAllocaMap.count(Slot) && 5455 "can only use static allocas with llvm.eh.unwindhelp"); 5456 int FI = FuncInfo.StaticAllocaMap[Slot]; 5457 MachineFunction &MF = DAG.getMachineFunction(); 5458 MachineModuleInfo &MMI = MF.getMMI(); 5459 MMI.getWinEHFuncInfo(MF.getFunction()).UnwindHelpFrameIdx = FI; 5460 return nullptr; 5461 } 5462 } 5463 } 5464 5465 std::pair<SDValue, SDValue> 5466 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, 5467 MachineBasicBlock *LandingPad) { 5468 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 5469 MCSymbol *BeginLabel = nullptr; 5470 5471 if (LandingPad) { 5472 // Insert a label before the invoke call to mark the try range. This can be 5473 // used to detect deletion of the invoke via the MachineModuleInfo. 5474 BeginLabel = MMI.getContext().CreateTempSymbol(); 5475 5476 // For SjLj, keep track of which landing pads go with which invokes 5477 // so as to maintain the ordering of pads in the LSDA. 5478 unsigned CallSiteIndex = MMI.getCurrentCallSite(); 5479 if (CallSiteIndex) { 5480 MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex); 5481 LPadToCallSiteMap[LandingPad].push_back(CallSiteIndex); 5482 5483 // Now that the call site is handled, stop tracking it. 5484 MMI.setCurrentCallSite(0); 5485 } 5486 5487 // Both PendingLoads and PendingExports must be flushed here; 5488 // this call might not return. 5489 (void)getRoot(); 5490 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); 5491 5492 CLI.setChain(getRoot()); 5493 } 5494 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5495 std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); 5496 5497 assert((CLI.IsTailCall || Result.second.getNode()) && 5498 "Non-null chain expected with non-tail call!"); 5499 assert((Result.second.getNode() || !Result.first.getNode()) && 5500 "Null value expected with tail call!"); 5501 5502 if (!Result.second.getNode()) { 5503 // As a special case, a null chain means that a tail call has been emitted 5504 // and the DAG root is already updated. 5505 HasTailCall = true; 5506 5507 // Since there's no actual continuation from this block, nothing can be 5508 // relying on us setting vregs for them. 5509 PendingExports.clear(); 5510 } else { 5511 DAG.setRoot(Result.second); 5512 } 5513 5514 if (LandingPad) { 5515 // Insert a label at the end of the invoke call to mark the try range. This 5516 // can be used to detect deletion of the invoke via the MachineModuleInfo. 5517 MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); 5518 DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); 5519 5520 // Inform MachineModuleInfo of range. 5521 MMI.addInvoke(LandingPad, BeginLabel, EndLabel); 5522 } 5523 5524 return Result; 5525 } 5526 5527 void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, 5528 bool isTailCall, 5529 MachineBasicBlock *LandingPad) { 5530 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 5531 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 5532 Type *RetTy = FTy->getReturnType(); 5533 5534 TargetLowering::ArgListTy Args; 5535 TargetLowering::ArgListEntry Entry; 5536 Args.reserve(CS.arg_size()); 5537 5538 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 5539 i != e; ++i) { 5540 const Value *V = *i; 5541 5542 // Skip empty types 5543 if (V->getType()->isEmptyTy()) 5544 continue; 5545 5546 SDValue ArgNode = getValue(V); 5547 Entry.Node = ArgNode; Entry.Ty = V->getType(); 5548 5549 // Skip the first return-type Attribute to get to params. 5550 Entry.setAttributes(&CS, i - CS.arg_begin() + 1); 5551 Args.push_back(Entry); 5552 5553 // If we have an explicit sret argument that is an Instruction, (i.e., it 5554 // might point to function-local memory), we can't meaningfully tail-call. 5555 if (Entry.isSRet && isa<Instruction>(V)) 5556 isTailCall = false; 5557 } 5558 5559 // Check if target-independent constraints permit a tail call here. 5560 // Target-dependent constraints are checked within TLI->LowerCallTo. 5561 if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget())) 5562 isTailCall = false; 5563 5564 TargetLowering::CallLoweringInfo CLI(DAG); 5565 CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) 5566 .setCallee(RetTy, FTy, Callee, std::move(Args), CS) 5567 .setTailCall(isTailCall); 5568 std::pair<SDValue,SDValue> Result = lowerInvokable(CLI, LandingPad); 5569 5570 if (Result.first.getNode()) 5571 setValue(CS.getInstruction(), Result.first); 5572 } 5573 5574 /// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the 5575 /// value is equal or not-equal to zero. 5576 static bool IsOnlyUsedInZeroEqualityComparison(const Value *V) { 5577 for (const User *U : V->users()) { 5578 if (const ICmpInst *IC = dyn_cast<ICmpInst>(U)) 5579 if (IC->isEquality()) 5580 if (const Constant *C = dyn_cast<Constant>(IC->getOperand(1))) 5581 if (C->isNullValue()) 5582 continue; 5583 // Unknown instruction. 5584 return false; 5585 } 5586 return true; 5587 } 5588 5589 static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, 5590 Type *LoadTy, 5591 SelectionDAGBuilder &Builder) { 5592 5593 // Check to see if this load can be trivially constant folded, e.g. if the 5594 // input is from a string literal. 5595 if (const Constant *LoadInput = dyn_cast<Constant>(PtrVal)) { 5596 // Cast pointer to the type we really want to load. 5597 LoadInput = ConstantExpr::getBitCast(const_cast<Constant *>(LoadInput), 5598 PointerType::getUnqual(LoadTy)); 5599 5600 if (const Constant *LoadCst = ConstantFoldLoadFromConstPtr( 5601 const_cast<Constant *>(LoadInput), *Builder.DL)) 5602 return Builder.getValue(LoadCst); 5603 } 5604 5605 // Otherwise, we have to emit the load. If the pointer is to unfoldable but 5606 // still constant memory, the input chain can be the entry node. 5607 SDValue Root; 5608 bool ConstantMemory = false; 5609 5610 // Do not serialize (non-volatile) loads of constant memory with anything. 5611 if (Builder.AA->pointsToConstantMemory(PtrVal)) { 5612 Root = Builder.DAG.getEntryNode(); 5613 ConstantMemory = true; 5614 } else { 5615 // Do not serialize non-volatile loads against each other. 5616 Root = Builder.DAG.getRoot(); 5617 } 5618 5619 SDValue Ptr = Builder.getValue(PtrVal); 5620 SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, 5621 Ptr, MachinePointerInfo(PtrVal), 5622 false /*volatile*/, 5623 false /*nontemporal*/, 5624 false /*isinvariant*/, 1 /* align=1 */); 5625 5626 if (!ConstantMemory) 5627 Builder.PendingLoads.push_back(LoadVal.getValue(1)); 5628 return LoadVal; 5629 } 5630 5631 /// processIntegerCallValue - Record the value for an instruction that 5632 /// produces an integer result, converting the type where necessary. 5633 void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, 5634 SDValue Value, 5635 bool IsSigned) { 5636 EVT VT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); 5637 if (IsSigned) 5638 Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); 5639 else 5640 Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); 5641 setValue(&I, Value); 5642 } 5643 5644 /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. 5645 /// If so, return true and lower it, otherwise return false and it will be 5646 /// lowered like a normal call. 5647 bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { 5648 // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) 5649 if (I.getNumArgOperands() != 3) 5650 return false; 5651 5652 const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); 5653 if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || 5654 !I.getArgOperand(2)->getType()->isIntegerTy() || 5655 !I.getType()->isIntegerTy()) 5656 return false; 5657 5658 const Value *Size = I.getArgOperand(2); 5659 const ConstantInt *CSize = dyn_cast<ConstantInt>(Size); 5660 if (CSize && CSize->getZExtValue() == 0) { 5661 EVT CallVT = DAG.getTargetLoweringInfo().getValueType(I.getType(), true); 5662 setValue(&I, DAG.getConstant(0, CallVT)); 5663 return true; 5664 } 5665 5666 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5667 std::pair<SDValue, SDValue> Res = 5668 TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), 5669 getValue(LHS), getValue(RHS), getValue(Size), 5670 MachinePointerInfo(LHS), 5671 MachinePointerInfo(RHS)); 5672 if (Res.first.getNode()) { 5673 processIntegerCallValue(I, Res.first, true); 5674 PendingLoads.push_back(Res.second); 5675 return true; 5676 } 5677 5678 // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 5679 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 5680 if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { 5681 bool ActuallyDoIt = true; 5682 MVT LoadVT; 5683 Type *LoadTy; 5684 switch (CSize->getZExtValue()) { 5685 default: 5686 LoadVT = MVT::Other; 5687 LoadTy = nullptr; 5688 ActuallyDoIt = false; 5689 break; 5690 case 2: 5691 LoadVT = MVT::i16; 5692 LoadTy = Type::getInt16Ty(CSize->getContext()); 5693 break; 5694 case 4: 5695 LoadVT = MVT::i32; 5696 LoadTy = Type::getInt32Ty(CSize->getContext()); 5697 break; 5698 case 8: 5699 LoadVT = MVT::i64; 5700 LoadTy = Type::getInt64Ty(CSize->getContext()); 5701 break; 5702 /* 5703 case 16: 5704 LoadVT = MVT::v4i32; 5705 LoadTy = Type::getInt32Ty(CSize->getContext()); 5706 LoadTy = VectorType::get(LoadTy, 4); 5707 break; 5708 */ 5709 } 5710 5711 // This turns into unaligned loads. We only do this if the target natively 5712 // supports the MVT we'll be loading or if it is small enough (<= 4) that 5713 // we'll only produce a small number of byte loads. 5714 5715 // Require that we can find a legal MVT, and only do this if the target 5716 // supports unaligned loads of that type. Expanding into byte loads would 5717 // bloat the code. 5718 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5719 if (ActuallyDoIt && CSize->getZExtValue() > 4) { 5720 unsigned DstAS = LHS->getType()->getPointerAddressSpace(); 5721 unsigned SrcAS = RHS->getType()->getPointerAddressSpace(); 5722 // TODO: Handle 5 byte compare as 4-byte + 1 byte. 5723 // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. 5724 // TODO: Check alignment of src and dest ptrs. 5725 if (!TLI.isTypeLegal(LoadVT) || 5726 !TLI.allowsMisalignedMemoryAccesses(LoadVT, SrcAS) || 5727 !TLI.allowsMisalignedMemoryAccesses(LoadVT, DstAS)) 5728 ActuallyDoIt = false; 5729 } 5730 5731 if (ActuallyDoIt) { 5732 SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); 5733 SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); 5734 5735 SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, 5736 ISD::SETNE); 5737 processIntegerCallValue(I, Res, false); 5738 return true; 5739 } 5740 } 5741 5742 5743 return false; 5744 } 5745 5746 /// visitMemChrCall -- See if we can lower a memchr call into an optimized 5747 /// form. If so, return true and lower it, otherwise return false and it 5748 /// will be lowered like a normal call. 5749 bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { 5750 // Verify that the prototype makes sense. void *memchr(void *, int, size_t) 5751 if (I.getNumArgOperands() != 3) 5752 return false; 5753 5754 const Value *Src = I.getArgOperand(0); 5755 const Value *Char = I.getArgOperand(1); 5756 const Value *Length = I.getArgOperand(2); 5757 if (!Src->getType()->isPointerTy() || 5758 !Char->getType()->isIntegerTy() || 5759 !Length->getType()->isIntegerTy() || 5760 !I.getType()->isPointerTy()) 5761 return false; 5762 5763 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5764 std::pair<SDValue, SDValue> Res = 5765 TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), 5766 getValue(Src), getValue(Char), getValue(Length), 5767 MachinePointerInfo(Src)); 5768 if (Res.first.getNode()) { 5769 setValue(&I, Res.first); 5770 PendingLoads.push_back(Res.second); 5771 return true; 5772 } 5773 5774 return false; 5775 } 5776 5777 /// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an 5778 /// optimized form. If so, return true and lower it, otherwise return false 5779 /// and it will be lowered like a normal call. 5780 bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { 5781 // Verify that the prototype makes sense. char *strcpy(char *, char *) 5782 if (I.getNumArgOperands() != 2) 5783 return false; 5784 5785 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); 5786 if (!Arg0->getType()->isPointerTy() || 5787 !Arg1->getType()->isPointerTy() || 5788 !I.getType()->isPointerTy()) 5789 return false; 5790 5791 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5792 std::pair<SDValue, SDValue> Res = 5793 TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), 5794 getValue(Arg0), getValue(Arg1), 5795 MachinePointerInfo(Arg0), 5796 MachinePointerInfo(Arg1), isStpcpy); 5797 if (Res.first.getNode()) { 5798 setValue(&I, Res.first); 5799 DAG.setRoot(Res.second); 5800 return true; 5801 } 5802 5803 return false; 5804 } 5805 5806 /// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. 5807 /// If so, return true and lower it, otherwise return false and it will be 5808 /// lowered like a normal call. 5809 bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { 5810 // Verify that the prototype makes sense. int strcmp(void*,void*) 5811 if (I.getNumArgOperands() != 2) 5812 return false; 5813 5814 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); 5815 if (!Arg0->getType()->isPointerTy() || 5816 !Arg1->getType()->isPointerTy() || 5817 !I.getType()->isIntegerTy()) 5818 return false; 5819 5820 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5821 std::pair<SDValue, SDValue> Res = 5822 TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), 5823 getValue(Arg0), getValue(Arg1), 5824 MachinePointerInfo(Arg0), 5825 MachinePointerInfo(Arg1)); 5826 if (Res.first.getNode()) { 5827 processIntegerCallValue(I, Res.first, true); 5828 PendingLoads.push_back(Res.second); 5829 return true; 5830 } 5831 5832 return false; 5833 } 5834 5835 /// visitStrLenCall -- See if we can lower a strlen call into an optimized 5836 /// form. If so, return true and lower it, otherwise return false and it 5837 /// will be lowered like a normal call. 5838 bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { 5839 // Verify that the prototype makes sense. size_t strlen(char *) 5840 if (I.getNumArgOperands() != 1) 5841 return false; 5842 5843 const Value *Arg0 = I.getArgOperand(0); 5844 if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) 5845 return false; 5846 5847 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5848 std::pair<SDValue, SDValue> Res = 5849 TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), 5850 getValue(Arg0), MachinePointerInfo(Arg0)); 5851 if (Res.first.getNode()) { 5852 processIntegerCallValue(I, Res.first, false); 5853 PendingLoads.push_back(Res.second); 5854 return true; 5855 } 5856 5857 return false; 5858 } 5859 5860 /// visitStrNLenCall -- See if we can lower a strnlen call into an optimized 5861 /// form. If so, return true and lower it, otherwise return false and it 5862 /// will be lowered like a normal call. 5863 bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { 5864 // Verify that the prototype makes sense. size_t strnlen(char *, size_t) 5865 if (I.getNumArgOperands() != 2) 5866 return false; 5867 5868 const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); 5869 if (!Arg0->getType()->isPointerTy() || 5870 !Arg1->getType()->isIntegerTy() || 5871 !I.getType()->isIntegerTy()) 5872 return false; 5873 5874 const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); 5875 std::pair<SDValue, SDValue> Res = 5876 TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), 5877 getValue(Arg0), getValue(Arg1), 5878 MachinePointerInfo(Arg0)); 5879 if (Res.first.getNode()) { 5880 processIntegerCallValue(I, Res.first, false); 5881 PendingLoads.push_back(Res.second); 5882 return true; 5883 } 5884 5885 return false; 5886 } 5887 5888 /// visitUnaryFloatCall - If a call instruction is a unary floating-point 5889 /// operation (as expected), translate it to an SDNode with the specified opcode 5890 /// and return true. 5891 bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, 5892 unsigned Opcode) { 5893 // Sanity check that it really is a unary floating-point call. 5894 if (I.getNumArgOperands() != 1 || 5895 !I.getArgOperand(0)->getType()->isFloatingPointTy() || 5896 I.getType() != I.getArgOperand(0)->getType() || 5897 !I.onlyReadsMemory()) 5898 return false; 5899 5900 SDValue Tmp = getValue(I.getArgOperand(0)); 5901 setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); 5902 return true; 5903 } 5904 5905 /// visitBinaryFloatCall - If a call instruction is a binary floating-point 5906 /// operation (as expected), translate it to an SDNode with the specified opcode 5907 /// and return true. 5908 bool SelectionDAGBuilder::visitBinaryFloatCall(const CallInst &I, 5909 unsigned Opcode) { 5910 // Sanity check that it really is a binary floating-point call. 5911 if (I.getNumArgOperands() != 2 || 5912 !I.getArgOperand(0)->getType()->isFloatingPointTy() || 5913 I.getType() != I.getArgOperand(0)->getType() || 5914 I.getType() != I.getArgOperand(1)->getType() || 5915 !I.onlyReadsMemory()) 5916 return false; 5917 5918 SDValue Tmp0 = getValue(I.getArgOperand(0)); 5919 SDValue Tmp1 = getValue(I.getArgOperand(1)); 5920 EVT VT = Tmp0.getValueType(); 5921 setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), VT, Tmp0, Tmp1)); 5922 return true; 5923 } 5924 5925 void SelectionDAGBuilder::visitCall(const CallInst &I) { 5926 // Handle inline assembly differently. 5927 if (isa<InlineAsm>(I.getCalledValue())) { 5928 visitInlineAsm(&I); 5929 return; 5930 } 5931 5932 MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); 5933 ComputeUsesVAFloatArgument(I, &MMI); 5934 5935 const char *RenameFn = nullptr; 5936 if (Function *F = I.getCalledFunction()) { 5937 if (F->isDeclaration()) { 5938 if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { 5939 if (unsigned IID = II->getIntrinsicID(F)) { 5940 RenameFn = visitIntrinsicCall(I, IID); 5941 if (!RenameFn) 5942 return; 5943 } 5944 } 5945 if (unsigned IID = F->getIntrinsicID()) { 5946 RenameFn = visitIntrinsicCall(I, IID); 5947 if (!RenameFn) 5948 return; 5949 } 5950 } 5951 5952 // Check for well-known libc/libm calls. If the function is internal, it 5953 // can't be a library call. 5954 LibFunc::Func Func; 5955 if (!F->hasLocalLinkage() && F->hasName() && 5956 LibInfo->getLibFunc(F->getName(), Func) && 5957 LibInfo->hasOptimizedCodeGen(Func)) { 5958 switch (Func) { 5959 default: break; 5960 case LibFunc::copysign: 5961 case LibFunc::copysignf: 5962 case LibFunc::copysignl: 5963 if (I.getNumArgOperands() == 2 && // Basic sanity checks. 5964 I.getArgOperand(0)->getType()->isFloatingPointTy() && 5965 I.getType() == I.getArgOperand(0)->getType() && 5966 I.getType() == I.getArgOperand(1)->getType() && 5967 I.onlyReadsMemory()) { 5968 SDValue LHS = getValue(I.getArgOperand(0)); 5969 SDValue RHS = getValue(I.getArgOperand(1)); 5970 setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), 5971 LHS.getValueType(), LHS, RHS)); 5972 return; 5973 } 5974 break; 5975 case LibFunc::fabs: 5976 case LibFunc::fabsf: 5977 case LibFunc::fabsl: 5978 if (visitUnaryFloatCall(I, ISD::FABS)) 5979 return; 5980 break; 5981 case LibFunc::fmin: 5982 case LibFunc::fminf: 5983 case LibFunc::fminl: 5984 if (visitBinaryFloatCall(I, ISD::FMINNUM)) 5985 return; 5986 break; 5987 case LibFunc::fmax: 5988 case LibFunc::fmaxf: 5989 case LibFunc::fmaxl: 5990 if (visitBinaryFloatCall(I, ISD::FMAXNUM)) 5991 return; 5992 break; 5993 case LibFunc::sin: 5994 case LibFunc::sinf: 5995 case LibFunc::sinl: 5996 if (visitUnaryFloatCall(I, ISD::FSIN)) 5997 return; 5998 break; 5999 case LibFunc::cos: 6000 case LibFunc::cosf: 6001 case LibFunc::cosl: 6002 if (visitUnaryFloatCall(I, ISD::FCOS)) 6003 return; 6004 break; 6005 case LibFunc::sqrt: 6006 case LibFunc::sqrtf: 6007 case LibFunc::sqrtl: 6008 case LibFunc::sqrt_finite: 6009 case LibFunc::sqrtf_finite: 6010 case LibFunc::sqrtl_finite: 6011 if (visitUnaryFloatCall(I, ISD::FSQRT)) 6012 return; 6013 break; 6014 case LibFunc::floor: 6015 case LibFunc::floorf: 6016 case LibFunc::floorl: 6017 if (visitUnaryFloatCall(I, ISD::FFLOOR)) 6018 return; 6019 break; 6020 case LibFunc::nearbyint: 6021 case LibFunc::nearbyintf: 6022 case LibFunc::nearbyintl: 6023 if (visitUnaryFloatCall(I, ISD::FNEARBYINT)) 6024 return; 6025 break; 6026 case LibFunc::ceil: 6027 case LibFunc::ceilf: 6028 case LibFunc::ceill: 6029 if (visitUnaryFloatCall(I, ISD::FCEIL)) 6030 return; 6031 break; 6032 case LibFunc::rint: 6033 case LibFunc::rintf: 6034 case LibFunc::rintl: 6035 if (visitUnaryFloatCall(I, ISD::FRINT)) 6036 return; 6037 break; 6038 case LibFunc::round: 6039 case LibFunc::roundf: 6040 case LibFunc::roundl: 6041 if (visitUnaryFloatCall(I, ISD::FROUND)) 6042 return; 6043 break; 6044 case LibFunc::trunc: 6045 case LibFunc::truncf: 6046 case LibFunc::truncl: 6047 if (visitUnaryFloatCall(I, ISD::FTRUNC)) 6048 return; 6049 break; 6050 case LibFunc::log2: 6051 case LibFunc::log2f: 6052 case LibFunc::log2l: 6053 if (visitUnaryFloatCall(I, ISD::FLOG2)) 6054 return; 6055 break; 6056 case LibFunc::exp2: 6057 case LibFunc::exp2f: 6058 case LibFunc::exp2l: 6059 if (visitUnaryFloatCall(I, ISD::FEXP2)) 6060 return; 6061 break; 6062 case LibFunc::memcmp: 6063 if (visitMemCmpCall(I)) 6064 return; 6065 break; 6066 case LibFunc::memchr: 6067 if (visitMemChrCall(I)) 6068 return; 6069 break; 6070 case LibFunc::strcpy: 6071 if (visitStrCpyCall(I, false)) 6072 return; 6073 break; 6074 case LibFunc::stpcpy: 6075 if (visitStrCpyCall(I, true)) 6076 return; 6077 break; 6078 case LibFunc::strcmp: 6079 if (visitStrCmpCall(I)) 6080 return; 6081 break; 6082 case LibFunc::strlen: 6083 if (visitStrLenCall(I)) 6084 return; 6085 break; 6086 case LibFunc::strnlen: 6087 if (visitStrNLenCall(I)) 6088 return; 6089 break; 6090 } 6091 } 6092 } 6093 6094 SDValue Callee; 6095 if (!RenameFn) 6096 Callee = getValue(I.getCalledValue()); 6097 else 6098 Callee = DAG.getExternalSymbol(RenameFn, 6099 DAG.getTargetLoweringInfo().getPointerTy()); 6100 6101 // Check if we can potentially perform a tail call. More detailed checking is 6102 // be done within LowerCallTo, after more information about the call is known. 6103 LowerCallTo(&I, Callee, I.isTailCall()); 6104 } 6105 6106 namespace { 6107 6108 /// AsmOperandInfo - This contains information for each constraint that we are 6109 /// lowering. 6110 class SDISelAsmOperandInfo : public TargetLowering::AsmOperandInfo { 6111 public: 6112 /// CallOperand - If this is the result output operand or a clobber 6113 /// this is null, otherwise it is the incoming operand to the CallInst. 6114 /// This gets modified as the asm is processed. 6115 SDValue CallOperand; 6116 6117 /// AssignedRegs - If this is a register or register class operand, this 6118 /// contains the set of register corresponding to the operand. 6119 RegsForValue AssignedRegs; 6120 6121 explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info) 6122 : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) { 6123 } 6124 6125 /// getCallOperandValEVT - Return the EVT of the Value* that this operand 6126 /// corresponds to. If there is no Value* for this operand, it returns 6127 /// MVT::Other. 6128 EVT getCallOperandValEVT(LLVMContext &Context, 6129 const TargetLowering &TLI, 6130 const DataLayout *DL) const { 6131 if (!CallOperandVal) return MVT::Other; 6132 6133 if (isa<BasicBlock>(CallOperandVal)) 6134 return TLI.getPointerTy(); 6135 6136 llvm::Type *OpTy = CallOperandVal->getType(); 6137 6138 // FIXME: code duplicated from TargetLowering::ParseConstraints(). 6139 // If this is an indirect operand, the operand is a pointer to the 6140 // accessed type. 6141 if (isIndirect) { 6142 llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy); 6143 if (!PtrTy) 6144 report_fatal_error("Indirect operand for inline asm not a pointer!"); 6145 OpTy = PtrTy->getElementType(); 6146 } 6147 6148 // Look for vector wrapped in a struct. e.g. { <16 x i8> }. 6149 if (StructType *STy = dyn_cast<StructType>(OpTy)) 6150 if (STy->getNumElements() == 1) 6151 OpTy = STy->getElementType(0); 6152 6153 // If OpTy is not a single value, it may be a struct/union that we 6154 // can tile with integers. 6155 if (!OpTy->isSingleValueType() && OpTy->isSized()) { 6156 unsigned BitSize = DL->getTypeSizeInBits(OpTy); 6157 switch (BitSize) { 6158 default: break; 6159 case 1: 6160 case 8: 6161 case 16: 6162 case 32: 6163 case 64: 6164 case 128: 6165 OpTy = IntegerType::get(Context, BitSize); 6166 break; 6167 } 6168 } 6169 6170 return TLI.getValueType(OpTy, true); 6171 } 6172 }; 6173 6174 typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector; 6175 6176 } // end anonymous namespace 6177 6178 /// GetRegistersForValue - Assign registers (virtual or physical) for the 6179 /// specified operand. We prefer to assign virtual registers, to allow the 6180 /// register allocator to handle the assignment process. However, if the asm 6181 /// uses features that we can't model on machineinstrs, we have SDISel do the 6182 /// allocation. This produces generally horrible, but correct, code. 6183 /// 6184 /// OpInfo describes the operand. 6185 /// 6186 static void GetRegistersForValue(SelectionDAG &DAG, 6187 const TargetLowering &TLI, 6188 SDLoc DL, 6189 SDISelAsmOperandInfo &OpInfo) { 6190 LLVMContext &Context = *DAG.getContext(); 6191 6192 MachineFunction &MF = DAG.getMachineFunction(); 6193 SmallVector<unsigned, 4> Regs; 6194 6195 // If this is a constraint for a single physreg, or a constraint for a 6196 // register class, find it. 6197 std::pair<unsigned, const TargetRegisterClass *> PhysReg = 6198 TLI.getRegForInlineAsmConstraint(MF.getSubtarget().getRegisterInfo(), 6199 OpInfo.ConstraintCode, 6200 OpInfo.ConstraintVT); 6201 6202 unsigned NumRegs = 1; 6203 if (OpInfo.ConstraintVT != MVT::Other) { 6204 // If this is a FP input in an integer register (or visa versa) insert a bit 6205 // cast of the input value. More generally, handle any case where the input 6206 // value disagrees with the register class we plan to stick this in. 6207 if (OpInfo.Type == InlineAsm::isInput && 6208 PhysReg.second && !PhysReg.second->hasType(OpInfo.ConstraintVT)) { 6209 // Try to convert to the first EVT that the reg class contains. If the 6210 // types are identical size, use a bitcast to convert (e.g. two differing 6211 // vector types). 6212 MVT RegVT = *PhysReg.second->vt_begin(); 6213 if (RegVT.getSizeInBits() == OpInfo.CallOperand.getValueSizeInBits()) { 6214 OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, 6215 RegVT, OpInfo.CallOperand); 6216 OpInfo.ConstraintVT = RegVT; 6217 } else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) { 6218 // If the input is a FP value and we want it in FP registers, do a 6219 // bitcast to the corresponding integer type. This turns an f64 value 6220 // into i64, which can be passed with two i32 values on a 32-bit 6221 // machine. 6222 RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); 6223 OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, 6224 RegVT, OpInfo.CallOperand); 6225 OpInfo.ConstraintVT = RegVT; 6226 } 6227 } 6228 6229 NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); 6230 } 6231 6232 MVT RegVT; 6233 EVT ValueVT = OpInfo.ConstraintVT; 6234 6235 // If this is a constraint for a specific physical register, like {r17}, 6236 // assign it now. 6237 if (unsigned AssignedReg = PhysReg.first) { 6238 const TargetRegisterClass *RC = PhysReg.second; 6239 if (OpInfo.ConstraintVT == MVT::Other) 6240 ValueVT = *RC->vt_begin(); 6241 6242 // Get the actual register value type. This is important, because the user 6243 // may have asked for (e.g.) the AX register in i32 type. We need to 6244 // remember that AX is actually i16 to get the right extension. 6245 RegVT = *RC->vt_begin(); 6246 6247 // This is a explicit reference to a physical register. 6248 Regs.push_back(AssignedReg); 6249 6250 // If this is an expanded reference, add the rest of the regs to Regs. 6251 if (NumRegs != 1) { 6252 TargetRegisterClass::iterator I = RC->begin(); 6253 for (; *I != AssignedReg; ++I) 6254 assert(I != RC->end() && "Didn't find reg!"); 6255 6256 // Already added the first reg. 6257 --NumRegs; ++I; 6258 for (; NumRegs; --NumRegs, ++I) { 6259 assert(I != RC->end() && "Ran out of registers to allocate!"); 6260 Regs.push_back(*I); 6261 } 6262 } 6263 6264 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); 6265 return; 6266 } 6267 6268 // Otherwise, if this was a reference to an LLVM register class, create vregs 6269 // for this reference. 6270 if (const TargetRegisterClass *RC = PhysReg.second) { 6271 RegVT = *RC->vt_begin(); 6272 if (OpInfo.ConstraintVT == MVT::Other) 6273 ValueVT = RegVT; 6274 6275 // Create the appropriate number of virtual registers. 6276 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 6277 for (; NumRegs; --NumRegs) 6278 Regs.push_back(RegInfo.createVirtualRegister(RC)); 6279 6280 OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); 6281 return; 6282 } 6283 6284 // Otherwise, we couldn't allocate enough registers for this. 6285 } 6286 6287 /// visitInlineAsm - Handle a call to an InlineAsm object. 6288 /// 6289 void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { 6290 const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue()); 6291 6292 /// ConstraintOperands - Information about all of the constraints. 6293 SDISelAsmOperandInfoVector ConstraintOperands; 6294 6295 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6296 TargetLowering::AsmOperandInfoVector TargetConstraints = 6297 TLI.ParseConstraints(DAG.getSubtarget().getRegisterInfo(), CS); 6298 6299 bool hasMemory = false; 6300 6301 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst. 6302 unsigned ResNo = 0; // ResNo - The result number of the next output. 6303 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 6304 ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); 6305 SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); 6306 6307 MVT OpVT = MVT::Other; 6308 6309 // Compute the value type for each operand. 6310 switch (OpInfo.Type) { 6311 case InlineAsm::isOutput: 6312 // Indirect outputs just consume an argument. 6313 if (OpInfo.isIndirect) { 6314 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 6315 break; 6316 } 6317 6318 // The return value of the call is this value. As such, there is no 6319 // corresponding argument. 6320 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); 6321 if (StructType *STy = dyn_cast<StructType>(CS.getType())) { 6322 OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); 6323 } else { 6324 assert(ResNo == 0 && "Asm only has one result!"); 6325 OpVT = TLI.getSimpleValueType(CS.getType()); 6326 } 6327 ++ResNo; 6328 break; 6329 case InlineAsm::isInput: 6330 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++)); 6331 break; 6332 case InlineAsm::isClobber: 6333 // Nothing to do. 6334 break; 6335 } 6336 6337 // If this is an input or an indirect output, process the call argument. 6338 // BasicBlocks are labels, currently appearing only in asm's. 6339 if (OpInfo.CallOperandVal) { 6340 if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) { 6341 OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]); 6342 } else { 6343 OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); 6344 } 6345 6346 OpVT = 6347 OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, DL).getSimpleVT(); 6348 } 6349 6350 OpInfo.ConstraintVT = OpVT; 6351 6352 // Indirect operand accesses access memory. 6353 if (OpInfo.isIndirect) 6354 hasMemory = true; 6355 else { 6356 for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { 6357 TargetLowering::ConstraintType 6358 CType = TLI.getConstraintType(OpInfo.Codes[j]); 6359 if (CType == TargetLowering::C_Memory) { 6360 hasMemory = true; 6361 break; 6362 } 6363 } 6364 } 6365 } 6366 6367 SDValue Chain, Flag; 6368 6369 // We won't need to flush pending loads if this asm doesn't touch 6370 // memory and is nonvolatile. 6371 if (hasMemory || IA->hasSideEffects()) 6372 Chain = getRoot(); 6373 else 6374 Chain = DAG.getRoot(); 6375 6376 // Second pass over the constraints: compute which constraint option to use 6377 // and assign registers to constraints that want a specific physreg. 6378 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 6379 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 6380 6381 // If this is an output operand with a matching input operand, look up the 6382 // matching input. If their types mismatch, e.g. one is an integer, the 6383 // other is floating point, or their sizes are different, flag it as an 6384 // error. 6385 if (OpInfo.hasMatchingInput()) { 6386 SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput]; 6387 6388 if (OpInfo.ConstraintVT != Input.ConstraintVT) { 6389 const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo(); 6390 std::pair<unsigned, const TargetRegisterClass *> MatchRC = 6391 TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode, 6392 OpInfo.ConstraintVT); 6393 std::pair<unsigned, const TargetRegisterClass *> InputRC = 6394 TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode, 6395 Input.ConstraintVT); 6396 if ((OpInfo.ConstraintVT.isInteger() != 6397 Input.ConstraintVT.isInteger()) || 6398 (MatchRC.second != InputRC.second)) { 6399 report_fatal_error("Unsupported asm: input constraint" 6400 " with a matching output constraint of" 6401 " incompatible type!"); 6402 } 6403 Input.ConstraintVT = OpInfo.ConstraintVT; 6404 } 6405 } 6406 6407 // Compute the constraint code and ConstraintType to use. 6408 TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); 6409 6410 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 6411 OpInfo.Type == InlineAsm::isClobber) 6412 continue; 6413 6414 // If this is a memory input, and if the operand is not indirect, do what we 6415 // need to to provide an address for the memory input. 6416 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 6417 !OpInfo.isIndirect) { 6418 assert((OpInfo.isMultipleAlternative || 6419 (OpInfo.Type == InlineAsm::isInput)) && 6420 "Can only indirectify direct input operands!"); 6421 6422 // Memory operands really want the address of the value. If we don't have 6423 // an indirect input, put it in the constpool if we can, otherwise spill 6424 // it to a stack slot. 6425 // TODO: This isn't quite right. We need to handle these according to 6426 // the addressing mode that the constraint wants. Also, this may take 6427 // an additional register for the computation and we don't want that 6428 // either. 6429 6430 // If the operand is a float, integer, or vector constant, spill to a 6431 // constant pool entry to get its address. 6432 const Value *OpVal = OpInfo.CallOperandVal; 6433 if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) || 6434 isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) { 6435 OpInfo.CallOperand = DAG.getConstantPool(cast<Constant>(OpVal), 6436 TLI.getPointerTy()); 6437 } else { 6438 // Otherwise, create a stack slot and emit a store to it before the 6439 // asm. 6440 Type *Ty = OpVal->getType(); 6441 uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); 6442 unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); 6443 MachineFunction &MF = DAG.getMachineFunction(); 6444 int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 6445 SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); 6446 Chain = DAG.getStore(Chain, getCurSDLoc(), 6447 OpInfo.CallOperand, StackSlot, 6448 MachinePointerInfo::getFixedStack(SSFI), 6449 false, false, 0); 6450 OpInfo.CallOperand = StackSlot; 6451 } 6452 6453 // There is no longer a Value* corresponding to this operand. 6454 OpInfo.CallOperandVal = nullptr; 6455 6456 // It is now an indirect operand. 6457 OpInfo.isIndirect = true; 6458 } 6459 6460 // If this constraint is for a specific register, allocate it before 6461 // anything else. 6462 if (OpInfo.ConstraintType == TargetLowering::C_Register) 6463 GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); 6464 } 6465 6466 // Second pass - Loop over all of the operands, assigning virtual or physregs 6467 // to register class operands. 6468 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 6469 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 6470 6471 // C_Register operands have already been allocated, Other/Memory don't need 6472 // to be. 6473 if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) 6474 GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo); 6475 } 6476 6477 // AsmNodeOperands - The operands for the ISD::INLINEASM node. 6478 std::vector<SDValue> AsmNodeOperands; 6479 AsmNodeOperands.push_back(SDValue()); // reserve space for input chain 6480 AsmNodeOperands.push_back( 6481 DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), 6482 TLI.getPointerTy())); 6483 6484 // If we have a !srcloc metadata node associated with it, we want to attach 6485 // this to the ultimately generated inline asm machineinstr. To do this, we 6486 // pass in the third operand as this (potentially null) inline asm MDNode. 6487 const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); 6488 AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); 6489 6490 // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore 6491 // bits as operand 3. 6492 unsigned ExtraInfo = 0; 6493 if (IA->hasSideEffects()) 6494 ExtraInfo |= InlineAsm::Extra_HasSideEffects; 6495 if (IA->isAlignStack()) 6496 ExtraInfo |= InlineAsm::Extra_IsAlignStack; 6497 // Set the asm dialect. 6498 ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect; 6499 6500 // Determine if this InlineAsm MayLoad or MayStore based on the constraints. 6501 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 6502 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 6503 6504 // Compute the constraint code and ConstraintType to use. 6505 TLI.ComputeConstraintToUse(OpInfo, SDValue()); 6506 6507 // Ideally, we would only check against memory constraints. However, the 6508 // meaning of an other constraint can be target-specific and we can't easily 6509 // reason about it. Therefore, be conservative and set MayLoad/MayStore 6510 // for other constriants as well. 6511 if (OpInfo.ConstraintType == TargetLowering::C_Memory || 6512 OpInfo.ConstraintType == TargetLowering::C_Other) { 6513 if (OpInfo.Type == InlineAsm::isInput) 6514 ExtraInfo |= InlineAsm::Extra_MayLoad; 6515 else if (OpInfo.Type == InlineAsm::isOutput) 6516 ExtraInfo |= InlineAsm::Extra_MayStore; 6517 else if (OpInfo.Type == InlineAsm::isClobber) 6518 ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); 6519 } 6520 } 6521 6522 AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, 6523 TLI.getPointerTy())); 6524 6525 // Loop over all of the inputs, copying the operand values into the 6526 // appropriate registers and processing the output regs. 6527 RegsForValue RetValRegs; 6528 6529 // IndirectStoresToEmit - The set of stores to emit after the inline asm node. 6530 std::vector<std::pair<RegsForValue, Value*> > IndirectStoresToEmit; 6531 6532 for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) { 6533 SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i]; 6534 6535 switch (OpInfo.Type) { 6536 case InlineAsm::isOutput: { 6537 if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass && 6538 OpInfo.ConstraintType != TargetLowering::C_Register) { 6539 // Memory output, or 'other' output (e.g. 'X' constraint). 6540 assert(OpInfo.isIndirect && "Memory output must be indirect operand"); 6541 6542 unsigned ConstraintID = 6543 TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); 6544 assert(ConstraintID != InlineAsm::Constraint_Unknown && 6545 "Failed to convert memory constraint code to constraint id."); 6546 6547 // Add information to the INLINEASM node to know about this output. 6548 unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 6549 OpFlags = InlineAsm::getFlagWordForMem(OpFlags, ConstraintID); 6550 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, MVT::i32)); 6551 AsmNodeOperands.push_back(OpInfo.CallOperand); 6552 break; 6553 } 6554 6555 // Otherwise, this is a register or register class output. 6556 6557 // Copy the output from the appropriate register. Find a register that 6558 // we can use. 6559 if (OpInfo.AssignedRegs.Regs.empty()) { 6560 LLVMContext &Ctx = *DAG.getContext(); 6561 Ctx.emitError(CS.getInstruction(), 6562 "couldn't allocate output register for constraint '" + 6563 Twine(OpInfo.ConstraintCode) + "'"); 6564 return; 6565 } 6566 6567 // If this is an indirect operand, store through the pointer after the 6568 // asm. 6569 if (OpInfo.isIndirect) { 6570 IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs, 6571 OpInfo.CallOperandVal)); 6572 } else { 6573 // This is the result value of the call. 6574 assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); 6575 // Concatenate this output onto the outputs list. 6576 RetValRegs.append(OpInfo.AssignedRegs); 6577 } 6578 6579 // Add information to the INLINEASM node to know that this register is 6580 // set. 6581 OpInfo.AssignedRegs 6582 .AddInlineAsmOperands(OpInfo.isEarlyClobber 6583 ? InlineAsm::Kind_RegDefEarlyClobber 6584 : InlineAsm::Kind_RegDef, 6585 false, 0, DAG, AsmNodeOperands); 6586 break; 6587 } 6588 case InlineAsm::isInput: { 6589 SDValue InOperandVal = OpInfo.CallOperand; 6590 6591 if (OpInfo.isMatchingInputConstraint()) { // Matching constraint? 6592 // If this is required to match an output register we have already set, 6593 // just use its register. 6594 unsigned OperandNo = OpInfo.getMatchedOperand(); 6595 6596 // Scan until we find the definition we already emitted of this operand. 6597 // When we find it, create a RegsForValue operand. 6598 unsigned CurOp = InlineAsm::Op_FirstOperand; 6599 for (; OperandNo; --OperandNo) { 6600 // Advance to the next operand. 6601 unsigned OpFlag = 6602 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); 6603 assert((InlineAsm::isRegDefKind(OpFlag) || 6604 InlineAsm::isRegDefEarlyClobberKind(OpFlag) || 6605 InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?"); 6606 CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1; 6607 } 6608 6609 unsigned OpFlag = 6610 cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue(); 6611 if (InlineAsm::isRegDefKind(OpFlag) || 6612 InlineAsm::isRegDefEarlyClobberKind(OpFlag)) { 6613 // Add (OpFlag&0xffff)>>3 registers to MatchedRegs. 6614 if (OpInfo.isIndirect) { 6615 // This happens on gcc/testsuite/gcc.dg/pr8788-1.c 6616 LLVMContext &Ctx = *DAG.getContext(); 6617 Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" 6618 " don't know how to handle tied " 6619 "indirect register inputs"); 6620 return; 6621 } 6622 6623 RegsForValue MatchedRegs; 6624 MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); 6625 MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); 6626 MatchedRegs.RegVTs.push_back(RegVT); 6627 MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); 6628 for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); 6629 i != e; ++i) { 6630 if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) 6631 MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); 6632 else { 6633 LLVMContext &Ctx = *DAG.getContext(); 6634 Ctx.emitError(CS.getInstruction(), 6635 "inline asm error: This value" 6636 " type register class is not natively supported!"); 6637 return; 6638 } 6639 } 6640 // Use the produced MatchedRegs object to 6641 MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), 6642 Chain, &Flag, CS.getInstruction()); 6643 MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, 6644 true, OpInfo.getMatchedOperand(), 6645 DAG, AsmNodeOperands); 6646 break; 6647 } 6648 6649 assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!"); 6650 assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 && 6651 "Unexpected number of operands"); 6652 // Add information to the INLINEASM node to know about this input. 6653 // See InlineAsm.h isUseOperandTiedToDef. 6654 OpFlag = InlineAsm::convertMemFlagWordToMatchingFlagWord(OpFlag); 6655 OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, 6656 OpInfo.getMatchedOperand()); 6657 AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, 6658 TLI.getPointerTy())); 6659 AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); 6660 break; 6661 } 6662 6663 // Treat indirect 'X' constraint as memory. 6664 if (OpInfo.ConstraintType == TargetLowering::C_Other && 6665 OpInfo.isIndirect) 6666 OpInfo.ConstraintType = TargetLowering::C_Memory; 6667 6668 if (OpInfo.ConstraintType == TargetLowering::C_Other) { 6669 std::vector<SDValue> Ops; 6670 TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, 6671 Ops, DAG); 6672 if (Ops.empty()) { 6673 LLVMContext &Ctx = *DAG.getContext(); 6674 Ctx.emitError(CS.getInstruction(), 6675 "invalid operand for inline asm constraint '" + 6676 Twine(OpInfo.ConstraintCode) + "'"); 6677 return; 6678 } 6679 6680 // Add information to the INLINEASM node to know about this input. 6681 unsigned ResOpType = 6682 InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); 6683 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, 6684 TLI.getPointerTy())); 6685 AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); 6686 break; 6687 } 6688 6689 if (OpInfo.ConstraintType == TargetLowering::C_Memory) { 6690 assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); 6691 assert(InOperandVal.getValueType() == TLI.getPointerTy() && 6692 "Memory operands expect pointer values"); 6693 6694 unsigned ConstraintID = 6695 TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode); 6696 assert(ConstraintID != InlineAsm::Constraint_Unknown && 6697 "Failed to convert memory constraint code to constraint id."); 6698 6699 // Add information to the INLINEASM node to know about this input. 6700 unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); 6701 ResOpType = InlineAsm::getFlagWordForMem(ResOpType, ConstraintID); 6702 AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, MVT::i32)); 6703 AsmNodeOperands.push_back(InOperandVal); 6704 break; 6705 } 6706 6707 assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass || 6708 OpInfo.ConstraintType == TargetLowering::C_Register) && 6709 "Unknown constraint type!"); 6710 6711 // TODO: Support this. 6712 if (OpInfo.isIndirect) { 6713 LLVMContext &Ctx = *DAG.getContext(); 6714 Ctx.emitError(CS.getInstruction(), 6715 "Don't know how to handle indirect register inputs yet " 6716 "for constraint '" + 6717 Twine(OpInfo.ConstraintCode) + "'"); 6718 return; 6719 } 6720 6721 // Copy the input into the appropriate registers. 6722 if (OpInfo.AssignedRegs.Regs.empty()) { 6723 LLVMContext &Ctx = *DAG.getContext(); 6724 Ctx.emitError(CS.getInstruction(), 6725 "couldn't allocate input reg for constraint '" + 6726 Twine(OpInfo.ConstraintCode) + "'"); 6727 return; 6728 } 6729 6730 OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), 6731 Chain, &Flag, CS.getInstruction()); 6732 6733 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, 6734 DAG, AsmNodeOperands); 6735 break; 6736 } 6737 case InlineAsm::isClobber: { 6738 // Add the clobbered value to the operand list, so that the register 6739 // allocator is aware that the physreg got clobbered. 6740 if (!OpInfo.AssignedRegs.Regs.empty()) 6741 OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber, 6742 false, 0, DAG, 6743 AsmNodeOperands); 6744 break; 6745 } 6746 } 6747 } 6748 6749 // Finish up input operands. Set the input chain and add the flag last. 6750 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 6751 if (Flag.getNode()) AsmNodeOperands.push_back(Flag); 6752 6753 Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), 6754 DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 6755 Flag = Chain.getValue(1); 6756 6757 // If this asm returns a register value, copy the result from that register 6758 // and set it as the value of the call. 6759 if (!RetValRegs.Regs.empty()) { 6760 SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), 6761 Chain, &Flag, CS.getInstruction()); 6762 6763 // FIXME: Why don't we do this for inline asms with MRVs? 6764 if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { 6765 EVT ResultType = TLI.getValueType(CS.getType()); 6766 6767 // If any of the results of the inline asm is a vector, it may have the 6768 // wrong width/num elts. This can happen for register classes that can 6769 // contain multiple different value types. The preg or vreg allocated may 6770 // not have the same VT as was expected. Convert it to the right type 6771 // with bit_convert. 6772 if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { 6773 Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), 6774 ResultType, Val); 6775 6776 } else if (ResultType != Val.getValueType() && 6777 ResultType.isInteger() && Val.getValueType().isInteger()) { 6778 // If a result value was tied to an input value, the computed result may 6779 // have a wider width than the expected result. Extract the relevant 6780 // portion. 6781 Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); 6782 } 6783 6784 assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); 6785 } 6786 6787 setValue(CS.getInstruction(), Val); 6788 // Don't need to use this as a chain in this case. 6789 if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty()) 6790 return; 6791 } 6792 6793 std::vector<std::pair<SDValue, const Value *> > StoresToEmit; 6794 6795 // Process indirect outputs, first output all of the flagged copies out of 6796 // physregs. 6797 for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { 6798 RegsForValue &OutRegs = IndirectStoresToEmit[i].first; 6799 const Value *Ptr = IndirectStoresToEmit[i].second; 6800 SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), 6801 Chain, &Flag, IA); 6802 StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); 6803 } 6804 6805 // Emit the non-flagged stores from the physregs. 6806 SmallVector<SDValue, 8> OutChains; 6807 for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { 6808 SDValue Val = DAG.getStore(Chain, getCurSDLoc(), 6809 StoresToEmit[i].first, 6810 getValue(StoresToEmit[i].second), 6811 MachinePointerInfo(StoresToEmit[i].second), 6812 false, false, 0); 6813 OutChains.push_back(Val); 6814 } 6815 6816 if (!OutChains.empty()) 6817 Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains); 6818 6819 DAG.setRoot(Chain); 6820 } 6821 6822 void SelectionDAGBuilder::visitVAStart(const CallInst &I) { 6823 DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), 6824 MVT::Other, getRoot(), 6825 getValue(I.getArgOperand(0)), 6826 DAG.getSrcValue(I.getArgOperand(0)))); 6827 } 6828 6829 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { 6830 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6831 const DataLayout &DL = *TLI.getDataLayout(); 6832 SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurSDLoc(), 6833 getRoot(), getValue(I.getOperand(0)), 6834 DAG.getSrcValue(I.getOperand(0)), 6835 DL.getABITypeAlignment(I.getType())); 6836 setValue(&I, V); 6837 DAG.setRoot(V.getValue(1)); 6838 } 6839 6840 void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { 6841 DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), 6842 MVT::Other, getRoot(), 6843 getValue(I.getArgOperand(0)), 6844 DAG.getSrcValue(I.getArgOperand(0)))); 6845 } 6846 6847 void SelectionDAGBuilder::visitVACopy(const CallInst &I) { 6848 DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), 6849 MVT::Other, getRoot(), 6850 getValue(I.getArgOperand(0)), 6851 getValue(I.getArgOperand(1)), 6852 DAG.getSrcValue(I.getArgOperand(0)), 6853 DAG.getSrcValue(I.getArgOperand(1)))); 6854 } 6855 6856 /// \brief Lower an argument list according to the target calling convention. 6857 /// 6858 /// \return A tuple of <return-value, token-chain> 6859 /// 6860 /// This is a helper for lowering intrinsics that follow a target calling 6861 /// convention or require stack pointer adjustment. Only a subset of the 6862 /// intrinsic's operands need to participate in the calling convention. 6863 std::pair<SDValue, SDValue> 6864 SelectionDAGBuilder::lowerCallOperands(ImmutableCallSite CS, unsigned ArgIdx, 6865 unsigned NumArgs, SDValue Callee, 6866 bool UseVoidTy, 6867 MachineBasicBlock *LandingPad, 6868 bool IsPatchPoint) { 6869 TargetLowering::ArgListTy Args; 6870 Args.reserve(NumArgs); 6871 6872 // Populate the argument list. 6873 // Attributes for args start at offset 1, after the return attribute. 6874 for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; 6875 ArgI != ArgE; ++ArgI) { 6876 const Value *V = CS->getOperand(ArgI); 6877 6878 assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); 6879 6880 TargetLowering::ArgListEntry Entry; 6881 Entry.Node = getValue(V); 6882 Entry.Ty = V->getType(); 6883 Entry.setAttributes(&CS, AttrI); 6884 Args.push_back(Entry); 6885 } 6886 6887 Type *retTy = UseVoidTy ? Type::getVoidTy(*DAG.getContext()) : CS->getType(); 6888 TargetLowering::CallLoweringInfo CLI(DAG); 6889 CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) 6890 .setCallee(CS.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) 6891 .setDiscardResult(CS->use_empty()).setIsPatchPoint(IsPatchPoint); 6892 6893 return lowerInvokable(CLI, LandingPad); 6894 } 6895 6896 /// \brief Add a stack map intrinsic call's live variable operands to a stackmap 6897 /// or patchpoint target node's operand list. 6898 /// 6899 /// Constants are converted to TargetConstants purely as an optimization to 6900 /// avoid constant materialization and register allocation. 6901 /// 6902 /// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not 6903 /// generate addess computation nodes, and so ExpandISelPseudo can convert the 6904 /// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids 6905 /// address materialization and register allocation, but may also be required 6906 /// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an 6907 /// alloca in the entry block, then the runtime may assume that the alloca's 6908 /// StackMap location can be read immediately after compilation and that the 6909 /// location is valid at any point during execution (this is similar to the 6910 /// assumption made by the llvm.gcroot intrinsic). If the alloca's location were 6911 /// only available in a register, then the runtime would need to trap when 6912 /// execution reaches the StackMap in order to read the alloca's location. 6913 static void addStackMapLiveVars(ImmutableCallSite CS, unsigned StartIdx, 6914 SmallVectorImpl<SDValue> &Ops, 6915 SelectionDAGBuilder &Builder) { 6916 for (unsigned i = StartIdx, e = CS.arg_size(); i != e; ++i) { 6917 SDValue OpVal = Builder.getValue(CS.getArgument(i)); 6918 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(OpVal)) { 6919 Ops.push_back( 6920 Builder.DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); 6921 Ops.push_back( 6922 Builder.DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); 6923 } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(OpVal)) { 6924 const TargetLowering &TLI = Builder.DAG.getTargetLoweringInfo(); 6925 Ops.push_back( 6926 Builder.DAG.getTargetFrameIndex(FI->getIndex(), TLI.getPointerTy())); 6927 } else 6928 Ops.push_back(OpVal); 6929 } 6930 } 6931 6932 /// \brief Lower llvm.experimental.stackmap directly to its target opcode. 6933 void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { 6934 // void @llvm.experimental.stackmap(i32 <id>, i32 <numShadowBytes>, 6935 // [live variables...]) 6936 6937 assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); 6938 6939 SDValue Chain, InFlag, Callee, NullPtr; 6940 SmallVector<SDValue, 32> Ops; 6941 6942 SDLoc DL = getCurSDLoc(); 6943 Callee = getValue(CI.getCalledValue()); 6944 NullPtr = DAG.getIntPtrConstant(0, true); 6945 6946 // The stackmap intrinsic only records the live variables (the arguemnts 6947 // passed to it) and emits NOPS (if requested). Unlike the patchpoint 6948 // intrinsic, this won't be lowered to a function call. This means we don't 6949 // have to worry about calling conventions and target specific lowering code. 6950 // Instead we perform the call lowering right here. 6951 // 6952 // chain, flag = CALLSEQ_START(chain, 0) 6953 // chain, flag = STACKMAP(id, nbytes, ..., chain, flag) 6954 // chain, flag = CALLSEQ_END(chain, 0, 0, flag) 6955 // 6956 Chain = DAG.getCALLSEQ_START(getRoot(), NullPtr, DL); 6957 InFlag = Chain.getValue(1); 6958 6959 // Add the <id> and <numBytes> constants. 6960 SDValue IDVal = getValue(CI.getOperand(PatchPointOpers::IDPos)); 6961 Ops.push_back(DAG.getTargetConstant( 6962 cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); 6963 SDValue NBytesVal = getValue(CI.getOperand(PatchPointOpers::NBytesPos)); 6964 Ops.push_back(DAG.getTargetConstant( 6965 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); 6966 6967 // Push live variables for the stack map. 6968 addStackMapLiveVars(&CI, 2, Ops, *this); 6969 6970 // We are not pushing any register mask info here on the operands list, 6971 // because the stackmap doesn't clobber anything. 6972 6973 // Push the chain and the glue flag. 6974 Ops.push_back(Chain); 6975 Ops.push_back(InFlag); 6976 6977 // Create the STACKMAP node. 6978 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 6979 SDNode *SM = DAG.getMachineNode(TargetOpcode::STACKMAP, DL, NodeTys, Ops); 6980 Chain = SDValue(SM, 0); 6981 InFlag = Chain.getValue(1); 6982 6983 Chain = DAG.getCALLSEQ_END(Chain, NullPtr, NullPtr, InFlag, DL); 6984 6985 // Stackmaps don't generate values, so nothing goes into the NodeMap. 6986 6987 // Set the root to the target-lowered call chain. 6988 DAG.setRoot(Chain); 6989 6990 // Inform the Frame Information that we have a stackmap in this function. 6991 FuncInfo.MF->getFrameInfo()->setHasStackMap(); 6992 } 6993 6994 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode. 6995 void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS, 6996 MachineBasicBlock *LandingPad) { 6997 // void|i64 @llvm.experimental.patchpoint.void|i64(i64 <id>, 6998 // i32 <numBytes>, 6999 // i8* <target>, 7000 // i32 <numArgs>, 7001 // [Args...], 7002 // [live variables...]) 7003 7004 CallingConv::ID CC = CS.getCallingConv(); 7005 bool IsAnyRegCC = CC == CallingConv::AnyReg; 7006 bool HasDef = !CS->getType()->isVoidTy(); 7007 SDValue Callee = getValue(CS->getOperand(2)); // <target> 7008 7009 // Get the real number of arguments participating in the call <numArgs> 7010 SDValue NArgVal = getValue(CS.getArgument(PatchPointOpers::NArgPos)); 7011 unsigned NumArgs = cast<ConstantSDNode>(NArgVal)->getZExtValue(); 7012 7013 // Skip the four meta args: <id>, <numNopBytes>, <target>, <numArgs> 7014 // Intrinsics include all meta-operands up to but not including CC. 7015 unsigned NumMetaOpers = PatchPointOpers::CCPos; 7016 assert(CS.arg_size() >= NumMetaOpers + NumArgs && 7017 "Not enough arguments provided to the patchpoint intrinsic"); 7018 7019 // For AnyRegCC the arguments are lowered later on manually. 7020 unsigned NumCallArgs = IsAnyRegCC ? 0 : NumArgs; 7021 std::pair<SDValue, SDValue> Result = 7022 lowerCallOperands(CS, NumMetaOpers, NumCallArgs, Callee, IsAnyRegCC, 7023 LandingPad, true); 7024 7025 SDNode *CallEnd = Result.second.getNode(); 7026 if (HasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) 7027 CallEnd = CallEnd->getOperand(0).getNode(); 7028 7029 /// Get a call instruction from the call sequence chain. 7030 /// Tail calls are not allowed. 7031 assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && 7032 "Expected a callseq node."); 7033 SDNode *Call = CallEnd->getOperand(0).getNode(); 7034 bool HasGlue = Call->getGluedNode(); 7035 7036 // Replace the target specific call node with the patchable intrinsic. 7037 SmallVector<SDValue, 8> Ops; 7038 7039 // Add the <id> and <numBytes> constants. 7040 SDValue IDVal = getValue(CS->getOperand(PatchPointOpers::IDPos)); 7041 Ops.push_back(DAG.getTargetConstant( 7042 cast<ConstantSDNode>(IDVal)->getZExtValue(), MVT::i64)); 7043 SDValue NBytesVal = getValue(CS->getOperand(PatchPointOpers::NBytesPos)); 7044 Ops.push_back(DAG.getTargetConstant( 7045 cast<ConstantSDNode>(NBytesVal)->getZExtValue(), MVT::i32)); 7046 7047 // Assume that the Callee is a constant address. 7048 // FIXME: handle function symbols in the future. 7049 Ops.push_back( 7050 DAG.getIntPtrConstant(cast<ConstantSDNode>(Callee)->getZExtValue(), 7051 /*isTarget=*/true)); 7052 7053 // Adjust <numArgs> to account for any arguments that have been passed on the 7054 // stack instead. 7055 // Call Node: Chain, Target, {Args}, RegMask, [Glue] 7056 unsigned NumCallRegArgs = Call->getNumOperands() - (HasGlue ? 4 : 3); 7057 NumCallRegArgs = IsAnyRegCC ? NumArgs : NumCallRegArgs; 7058 Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); 7059 7060 // Add the calling convention 7061 Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); 7062 7063 // Add the arguments we omitted previously. The register allocator should 7064 // place these in any free register. 7065 if (IsAnyRegCC) 7066 for (unsigned i = NumMetaOpers, e = NumMetaOpers + NumArgs; i != e; ++i) 7067 Ops.push_back(getValue(CS.getArgument(i))); 7068 7069 // Push the arguments from the call instruction up to the register mask. 7070 SDNode::op_iterator e = HasGlue ? Call->op_end()-2 : Call->op_end()-1; 7071 Ops.append(Call->op_begin() + 2, e); 7072 7073 // Push live variables for the stack map. 7074 addStackMapLiveVars(CS, NumMetaOpers + NumArgs, Ops, *this); 7075 7076 // Push the register mask info. 7077 if (HasGlue) 7078 Ops.push_back(*(Call->op_end()-2)); 7079 else 7080 Ops.push_back(*(Call->op_end()-1)); 7081 7082 // Push the chain (this is originally the first operand of the call, but 7083 // becomes now the last or second to last operand). 7084 Ops.push_back(*(Call->op_begin())); 7085 7086 // Push the glue flag (last operand). 7087 if (HasGlue) 7088 Ops.push_back(*(Call->op_end()-1)); 7089 7090 SDVTList NodeTys; 7091 if (IsAnyRegCC && HasDef) { 7092 // Create the return types based on the intrinsic definition 7093 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7094 SmallVector<EVT, 3> ValueVTs; 7095 ComputeValueVTs(TLI, CS->getType(), ValueVTs); 7096 assert(ValueVTs.size() == 1 && "Expected only one return value type."); 7097 7098 // There is always a chain and a glue type at the end 7099 ValueVTs.push_back(MVT::Other); 7100 ValueVTs.push_back(MVT::Glue); 7101 NodeTys = DAG.getVTList(ValueVTs); 7102 } else 7103 NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 7104 7105 // Replace the target specific call node with a PATCHPOINT node. 7106 MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, 7107 getCurSDLoc(), NodeTys, Ops); 7108 7109 // Update the NodeMap. 7110 if (HasDef) { 7111 if (IsAnyRegCC) 7112 setValue(CS.getInstruction(), SDValue(MN, 0)); 7113 else 7114 setValue(CS.getInstruction(), Result.first); 7115 } 7116 7117 // Fixup the consumers of the intrinsic. The chain and glue may be used in the 7118 // call sequence. Furthermore the location of the chain and glue can change 7119 // when the AnyReg calling convention is used and the intrinsic returns a 7120 // value. 7121 if (IsAnyRegCC && HasDef) { 7122 SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; 7123 SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; 7124 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 7125 } else 7126 DAG.ReplaceAllUsesWith(Call, MN); 7127 DAG.DeleteNode(Call); 7128 7129 // Inform the Frame Information that we have a patchpoint in this function. 7130 FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); 7131 } 7132 7133 /// Returns an AttributeSet representing the attributes applied to the return 7134 /// value of the given call. 7135 static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { 7136 SmallVector<Attribute::AttrKind, 2> Attrs; 7137 if (CLI.RetSExt) 7138 Attrs.push_back(Attribute::SExt); 7139 if (CLI.RetZExt) 7140 Attrs.push_back(Attribute::ZExt); 7141 if (CLI.IsInReg) 7142 Attrs.push_back(Attribute::InReg); 7143 7144 return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, 7145 Attrs); 7146 } 7147 7148 /// TargetLowering::LowerCallTo - This is the default LowerCallTo 7149 /// implementation, which just calls LowerCall. 7150 /// FIXME: When all targets are 7151 /// migrated to using LowerCall, this hook should be integrated into SDISel. 7152 std::pair<SDValue, SDValue> 7153 TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { 7154 // Handle the incoming return values from the call. 7155 CLI.Ins.clear(); 7156 Type *OrigRetTy = CLI.RetTy; 7157 SmallVector<EVT, 4> RetTys; 7158 SmallVector<uint64_t, 4> Offsets; 7159 ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); 7160 7161 SmallVector<ISD::OutputArg, 4> Outs; 7162 GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); 7163 7164 bool CanLowerReturn = 7165 this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), 7166 CLI.IsVarArg, Outs, CLI.RetTy->getContext()); 7167 7168 SDValue DemoteStackSlot; 7169 int DemoteStackIdx = -100; 7170 if (!CanLowerReturn) { 7171 // FIXME: equivalent assert? 7172 // assert(!CS.hasInAllocaArgument() && 7173 // "sret demotion is incompatible with inalloca"); 7174 uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); 7175 unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); 7176 MachineFunction &MF = CLI.DAG.getMachineFunction(); 7177 DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); 7178 Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); 7179 7180 DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); 7181 ArgListEntry Entry; 7182 Entry.Node = DemoteStackSlot; 7183 Entry.Ty = StackSlotPtrType; 7184 Entry.isSExt = false; 7185 Entry.isZExt = false; 7186 Entry.isInReg = false; 7187 Entry.isSRet = true; 7188 Entry.isNest = false; 7189 Entry.isByVal = false; 7190 Entry.isReturned = false; 7191 Entry.Alignment = Align; 7192 CLI.getArgs().insert(CLI.getArgs().begin(), Entry); 7193 CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); 7194 7195 // sret demotion isn't compatible with tail-calls, since the sret argument 7196 // points into the callers stack frame. 7197 CLI.IsTailCall = false; 7198 } else { 7199 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { 7200 EVT VT = RetTys[I]; 7201 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); 7202 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); 7203 for (unsigned i = 0; i != NumRegs; ++i) { 7204 ISD::InputArg MyFlags; 7205 MyFlags.VT = RegisterVT; 7206 MyFlags.ArgVT = VT; 7207 MyFlags.Used = CLI.IsReturnValueUsed; 7208 if (CLI.RetSExt) 7209 MyFlags.Flags.setSExt(); 7210 if (CLI.RetZExt) 7211 MyFlags.Flags.setZExt(); 7212 if (CLI.IsInReg) 7213 MyFlags.Flags.setInReg(); 7214 CLI.Ins.push_back(MyFlags); 7215 } 7216 } 7217 } 7218 7219 // Handle all of the outgoing arguments. 7220 CLI.Outs.clear(); 7221 CLI.OutVals.clear(); 7222 ArgListTy &Args = CLI.getArgs(); 7223 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 7224 SmallVector<EVT, 4> ValueVTs; 7225 ComputeValueVTs(*this, Args[i].Ty, ValueVTs); 7226 Type *FinalType = Args[i].Ty; 7227 if (Args[i].isByVal) 7228 FinalType = cast<PointerType>(Args[i].Ty)->getElementType(); 7229 bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters( 7230 FinalType, CLI.CallConv, CLI.IsVarArg); 7231 for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; 7232 ++Value) { 7233 EVT VT = ValueVTs[Value]; 7234 Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext()); 7235 SDValue Op = SDValue(Args[i].Node.getNode(), 7236 Args[i].Node.getResNo() + Value); 7237 ISD::ArgFlagsTy Flags; 7238 unsigned OriginalAlignment = getDataLayout()->getABITypeAlignment(ArgTy); 7239 7240 if (Args[i].isZExt) 7241 Flags.setZExt(); 7242 if (Args[i].isSExt) 7243 Flags.setSExt(); 7244 if (Args[i].isInReg) 7245 Flags.setInReg(); 7246 if (Args[i].isSRet) 7247 Flags.setSRet(); 7248 if (Args[i].isByVal) 7249 Flags.setByVal(); 7250 if (Args[i].isInAlloca) { 7251 Flags.setInAlloca(); 7252 // Set the byval flag for CCAssignFn callbacks that don't know about 7253 // inalloca. This way we can know how many bytes we should've allocated 7254 // and how many bytes a callee cleanup function will pop. If we port 7255 // inalloca to more targets, we'll have to add custom inalloca handling 7256 // in the various CC lowering callbacks. 7257 Flags.setByVal(); 7258 } 7259 if (Args[i].isByVal || Args[i].isInAlloca) { 7260 PointerType *Ty = cast<PointerType>(Args[i].Ty); 7261 Type *ElementTy = Ty->getElementType(); 7262 Flags.setByValSize(getDataLayout()->getTypeAllocSize(ElementTy)); 7263 // For ByVal, alignment should come from FE. BE will guess if this 7264 // info is not there but there are cases it cannot get right. 7265 unsigned FrameAlign; 7266 if (Args[i].Alignment) 7267 FrameAlign = Args[i].Alignment; 7268 else 7269 FrameAlign = getByValTypeAlignment(ElementTy); 7270 Flags.setByValAlign(FrameAlign); 7271 } 7272 if (Args[i].isNest) 7273 Flags.setNest(); 7274 if (NeedsRegBlock) 7275 Flags.setInConsecutiveRegs(); 7276 Flags.setOrigAlign(OriginalAlignment); 7277 7278 MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); 7279 unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); 7280 SmallVector<SDValue, 4> Parts(NumParts); 7281 ISD::NodeType ExtendKind = ISD::ANY_EXTEND; 7282 7283 if (Args[i].isSExt) 7284 ExtendKind = ISD::SIGN_EXTEND; 7285 else if (Args[i].isZExt) 7286 ExtendKind = ISD::ZERO_EXTEND; 7287 7288 // Conservatively only handle 'returned' on non-vectors for now 7289 if (Args[i].isReturned && !Op.getValueType().isVector()) { 7290 assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues && 7291 "unexpected use of 'returned'"); 7292 // Before passing 'returned' to the target lowering code, ensure that 7293 // either the register MVT and the actual EVT are the same size or that 7294 // the return value and argument are extended in the same way; in these 7295 // cases it's safe to pass the argument register value unchanged as the 7296 // return register value (although it's at the target's option whether 7297 // to do so) 7298 // TODO: allow code generation to take advantage of partially preserved 7299 // registers rather than clobbering the entire register when the 7300 // parameter extension method is not compatible with the return 7301 // extension method 7302 if ((NumParts * PartVT.getSizeInBits() == VT.getSizeInBits()) || 7303 (ExtendKind != ISD::ANY_EXTEND && 7304 CLI.RetSExt == Args[i].isSExt && CLI.RetZExt == Args[i].isZExt)) 7305 Flags.setReturned(); 7306 } 7307 7308 getCopyToParts(CLI.DAG, CLI.DL, Op, &Parts[0], NumParts, PartVT, 7309 CLI.CS ? CLI.CS->getInstruction() : nullptr, ExtendKind); 7310 7311 for (unsigned j = 0; j != NumParts; ++j) { 7312 // if it isn't first piece, alignment must be 1 7313 ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, 7314 i < CLI.NumFixedArgs, 7315 i, j*Parts[j].getValueType().getStoreSize()); 7316 if (NumParts > 1 && j == 0) 7317 MyFlags.Flags.setSplit(); 7318 else if (j != 0) 7319 MyFlags.Flags.setOrigAlign(1); 7320 7321 CLI.Outs.push_back(MyFlags); 7322 CLI.OutVals.push_back(Parts[j]); 7323 } 7324 7325 if (NeedsRegBlock && Value == NumValues - 1) 7326 CLI.Outs[CLI.Outs.size() - 1].Flags.setInConsecutiveRegsLast(); 7327 } 7328 } 7329 7330 SmallVector<SDValue, 4> InVals; 7331 CLI.Chain = LowerCall(CLI, InVals); 7332 7333 // Verify that the target's LowerCall behaved as expected. 7334 assert(CLI.Chain.getNode() && CLI.Chain.getValueType() == MVT::Other && 7335 "LowerCall didn't return a valid chain!"); 7336 assert((!CLI.IsTailCall || InVals.empty()) && 7337 "LowerCall emitted a return value for a tail call!"); 7338 assert((CLI.IsTailCall || InVals.size() == CLI.Ins.size()) && 7339 "LowerCall didn't emit the correct number of values!"); 7340 7341 // For a tail call, the return value is merely live-out and there aren't 7342 // any nodes in the DAG representing it. Return a special value to 7343 // indicate that a tail call has been emitted and no more Instructions 7344 // should be processed in the current block. 7345 if (CLI.IsTailCall) { 7346 CLI.DAG.setRoot(CLI.Chain); 7347 return std::make_pair(SDValue(), SDValue()); 7348 } 7349 7350 DEBUG(for (unsigned i = 0, e = CLI.Ins.size(); i != e; ++i) { 7351 assert(InVals[i].getNode() && 7352 "LowerCall emitted a null value!"); 7353 assert(EVT(CLI.Ins[i].VT) == InVals[i].getValueType() && 7354 "LowerCall emitted a value with the wrong type!"); 7355 }); 7356 7357 SmallVector<SDValue, 4> ReturnValues; 7358 if (!CanLowerReturn) { 7359 // The instruction result is the result of loading from the 7360 // hidden sret parameter. 7361 SmallVector<EVT, 1> PVTs; 7362 Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); 7363 7364 ComputeValueVTs(*this, PtrRetTy, PVTs); 7365 assert(PVTs.size() == 1 && "Pointers should fit in one register"); 7366 EVT PtrVT = PVTs[0]; 7367 7368 unsigned NumValues = RetTys.size(); 7369 ReturnValues.resize(NumValues); 7370 SmallVector<SDValue, 4> Chains(NumValues); 7371 7372 for (unsigned i = 0; i < NumValues; ++i) { 7373 SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, 7374 CLI.DAG.getConstant(Offsets[i], PtrVT)); 7375 SDValue L = CLI.DAG.getLoad( 7376 RetTys[i], CLI.DL, CLI.Chain, Add, 7377 MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, 7378 false, false, 1); 7379 ReturnValues[i] = L; 7380 Chains[i] = L.getValue(1); 7381 } 7382 7383 CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); 7384 } else { 7385 // Collect the legal value parts into potentially illegal values 7386 // that correspond to the original function's return values. 7387 ISD::NodeType AssertOp = ISD::DELETED_NODE; 7388 if (CLI.RetSExt) 7389 AssertOp = ISD::AssertSext; 7390 else if (CLI.RetZExt) 7391 AssertOp = ISD::AssertZext; 7392 unsigned CurReg = 0; 7393 for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { 7394 EVT VT = RetTys[I]; 7395 MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); 7396 unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); 7397 7398 ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], 7399 NumRegs, RegisterVT, VT, nullptr, 7400 AssertOp)); 7401 CurReg += NumRegs; 7402 } 7403 7404 // For a function returning void, there is no return value. We can't create 7405 // such a node, so we just return a null return value in that case. In 7406 // that case, nothing will actually look at the value. 7407 if (ReturnValues.empty()) 7408 return std::make_pair(SDValue(), CLI.Chain); 7409 } 7410 7411 SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, 7412 CLI.DAG.getVTList(RetTys), ReturnValues); 7413 return std::make_pair(Res, CLI.Chain); 7414 } 7415 7416 void TargetLowering::LowerOperationWrapper(SDNode *N, 7417 SmallVectorImpl<SDValue> &Results, 7418 SelectionDAG &DAG) const { 7419 SDValue Res = LowerOperation(SDValue(N, 0), DAG); 7420 if (Res.getNode()) 7421 Results.push_back(Res); 7422 } 7423 7424 SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 7425 llvm_unreachable("LowerOperation not implemented for this target!"); 7426 } 7427 7428 void 7429 SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { 7430 SDValue Op = getNonRegisterValue(V); 7431 assert((Op.getOpcode() != ISD::CopyFromReg || 7432 cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) && 7433 "Copy from a reg to the same reg!"); 7434 assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); 7435 7436 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7437 RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); 7438 SDValue Chain = DAG.getEntryNode(); 7439 7440 ISD::NodeType ExtendType = (FuncInfo.PreferredExtendType.find(V) == 7441 FuncInfo.PreferredExtendType.end()) 7442 ? ISD::ANY_EXTEND 7443 : FuncInfo.PreferredExtendType[V]; 7444 RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, nullptr, V, ExtendType); 7445 PendingExports.push_back(Chain); 7446 } 7447 7448 #include "llvm/CodeGen/SelectionDAGISel.h" 7449 7450 /// isOnlyUsedInEntryBlock - If the specified argument is only used in the 7451 /// entry block, return true. This includes arguments used by switches, since 7452 /// the switch may expand into multiple basic blocks. 7453 static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { 7454 // With FastISel active, we may be splitting blocks, so force creation 7455 // of virtual registers for all non-dead arguments. 7456 if (FastISel) 7457 return A->use_empty(); 7458 7459 const BasicBlock *Entry = A->getParent()->begin(); 7460 for (const User *U : A->users()) 7461 if (cast<Instruction>(U)->getParent() != Entry || isa<SwitchInst>(U)) 7462 return false; // Use not in entry block. 7463 7464 return true; 7465 } 7466 7467 void SelectionDAGISel::LowerArguments(const Function &F) { 7468 SelectionDAG &DAG = SDB->DAG; 7469 SDLoc dl = SDB->getCurSDLoc(); 7470 const DataLayout *DL = TLI->getDataLayout(); 7471 SmallVector<ISD::InputArg, 16> Ins; 7472 7473 if (!FuncInfo->CanLowerReturn) { 7474 // Put in an sret pointer parameter before all the other parameters. 7475 SmallVector<EVT, 1> ValueVTs; 7476 ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); 7477 7478 // NOTE: Assuming that a pointer will never break down to more than one VT 7479 // or one register. 7480 ISD::ArgFlagsTy Flags; 7481 Flags.setSRet(); 7482 MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); 7483 ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 7484 ISD::InputArg::NoArgIndex, 0); 7485 Ins.push_back(RetArg); 7486 } 7487 7488 // Set up the incoming argument description vector. 7489 unsigned Idx = 1; 7490 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); 7491 I != E; ++I, ++Idx) { 7492 SmallVector<EVT, 4> ValueVTs; 7493 ComputeValueVTs(*TLI, I->getType(), ValueVTs); 7494 bool isArgValueUsed = !I->use_empty(); 7495 unsigned PartBase = 0; 7496 Type *FinalType = I->getType(); 7497 if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) 7498 FinalType = cast<PointerType>(FinalType)->getElementType(); 7499 bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters( 7500 FinalType, F.getCallingConv(), F.isVarArg()); 7501 for (unsigned Value = 0, NumValues = ValueVTs.size(); 7502 Value != NumValues; ++Value) { 7503 EVT VT = ValueVTs[Value]; 7504 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext()); 7505 ISD::ArgFlagsTy Flags; 7506 unsigned OriginalAlignment = DL->getABITypeAlignment(ArgTy); 7507 7508 if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) 7509 Flags.setZExt(); 7510 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) 7511 Flags.setSExt(); 7512 if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) 7513 Flags.setInReg(); 7514 if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) 7515 Flags.setSRet(); 7516 if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) 7517 Flags.setByVal(); 7518 if (F.getAttributes().hasAttribute(Idx, Attribute::InAlloca)) { 7519 Flags.setInAlloca(); 7520 // Set the byval flag for CCAssignFn callbacks that don't know about 7521 // inalloca. This way we can know how many bytes we should've allocated 7522 // and how many bytes a callee cleanup function will pop. If we port 7523 // inalloca to more targets, we'll have to add custom inalloca handling 7524 // in the various CC lowering callbacks. 7525 Flags.setByVal(); 7526 } 7527 if (Flags.isByVal() || Flags.isInAlloca()) { 7528 PointerType *Ty = cast<PointerType>(I->getType()); 7529 Type *ElementTy = Ty->getElementType(); 7530 Flags.setByValSize(DL->getTypeAllocSize(ElementTy)); 7531 // For ByVal, alignment should be passed from FE. BE will guess if 7532 // this info is not there but there are cases it cannot get right. 7533 unsigned FrameAlign; 7534 if (F.getParamAlignment(Idx)) 7535 FrameAlign = F.getParamAlignment(Idx); 7536 else 7537 FrameAlign = TLI->getByValTypeAlignment(ElementTy); 7538 Flags.setByValAlign(FrameAlign); 7539 } 7540 if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) 7541 Flags.setNest(); 7542 if (NeedsRegBlock) 7543 Flags.setInConsecutiveRegs(); 7544 Flags.setOrigAlign(OriginalAlignment); 7545 7546 MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 7547 unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); 7548 for (unsigned i = 0; i != NumRegs; ++i) { 7549 ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, 7550 Idx-1, PartBase+i*RegisterVT.getStoreSize()); 7551 if (NumRegs > 1 && i == 0) 7552 MyFlags.Flags.setSplit(); 7553 // if it isn't first piece, alignment must be 1 7554 else if (i > 0) 7555 MyFlags.Flags.setOrigAlign(1); 7556 Ins.push_back(MyFlags); 7557 } 7558 if (NeedsRegBlock && Value == NumValues - 1) 7559 Ins[Ins.size() - 1].Flags.setInConsecutiveRegsLast(); 7560 PartBase += VT.getStoreSize(); 7561 } 7562 } 7563 7564 // Call the target to set up the argument values. 7565 SmallVector<SDValue, 8> InVals; 7566 SDValue NewRoot = TLI->LowerFormalArguments( 7567 DAG.getRoot(), F.getCallingConv(), F.isVarArg(), Ins, dl, DAG, InVals); 7568 7569 // Verify that the target's LowerFormalArguments behaved as expected. 7570 assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && 7571 "LowerFormalArguments didn't return a valid chain!"); 7572 assert(InVals.size() == Ins.size() && 7573 "LowerFormalArguments didn't emit the correct number of values!"); 7574 DEBUG({ 7575 for (unsigned i = 0, e = Ins.size(); i != e; ++i) { 7576 assert(InVals[i].getNode() && 7577 "LowerFormalArguments emitted a null value!"); 7578 assert(EVT(Ins[i].VT) == InVals[i].getValueType() && 7579 "LowerFormalArguments emitted a value with the wrong type!"); 7580 } 7581 }); 7582 7583 // Update the DAG with the new chain value resulting from argument lowering. 7584 DAG.setRoot(NewRoot); 7585 7586 // Set up the argument values. 7587 unsigned i = 0; 7588 Idx = 1; 7589 if (!FuncInfo->CanLowerReturn) { 7590 // Create a virtual register for the sret pointer, and put in a copy 7591 // from the sret argument into it. 7592 SmallVector<EVT, 1> ValueVTs; 7593 ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); 7594 MVT VT = ValueVTs[0].getSimpleVT(); 7595 MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 7596 ISD::NodeType AssertOp = ISD::DELETED_NODE; 7597 SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, 7598 RegVT, VT, nullptr, AssertOp); 7599 7600 MachineFunction& MF = SDB->DAG.getMachineFunction(); 7601 MachineRegisterInfo& RegInfo = MF.getRegInfo(); 7602 unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); 7603 FuncInfo->DemoteRegister = SRetReg; 7604 NewRoot = 7605 SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); 7606 DAG.setRoot(NewRoot); 7607 7608 // i indexes lowered arguments. Bump it past the hidden sret argument. 7609 // Idx indexes LLVM arguments. Don't touch it. 7610 ++i; 7611 } 7612 7613 for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; 7614 ++I, ++Idx) { 7615 SmallVector<SDValue, 4> ArgValues; 7616 SmallVector<EVT, 4> ValueVTs; 7617 ComputeValueVTs(*TLI, I->getType(), ValueVTs); 7618 unsigned NumValues = ValueVTs.size(); 7619 7620 // If this argument is unused then remember its value. It is used to generate 7621 // debugging information. 7622 if (I->use_empty() && NumValues) { 7623 SDB->setUnusedArgValue(I, InVals[i]); 7624 7625 // Also remember any frame index for use in FastISel. 7626 if (FrameIndexSDNode *FI = 7627 dyn_cast<FrameIndexSDNode>(InVals[i].getNode())) 7628 FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); 7629 } 7630 7631 for (unsigned Val = 0; Val != NumValues; ++Val) { 7632 EVT VT = ValueVTs[Val]; 7633 MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); 7634 unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); 7635 7636 if (!I->use_empty()) { 7637 ISD::NodeType AssertOp = ISD::DELETED_NODE; 7638 if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) 7639 AssertOp = ISD::AssertSext; 7640 else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) 7641 AssertOp = ISD::AssertZext; 7642 7643 ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], 7644 NumParts, PartVT, VT, 7645 nullptr, AssertOp)); 7646 } 7647 7648 i += NumParts; 7649 } 7650 7651 // We don't need to do anything else for unused arguments. 7652 if (ArgValues.empty()) 7653 continue; 7654 7655 // Note down frame index. 7656 if (FrameIndexSDNode *FI = 7657 dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode())) 7658 FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); 7659 7660 SDValue Res = DAG.getMergeValues(makeArrayRef(ArgValues.data(), NumValues), 7661 SDB->getCurSDLoc()); 7662 7663 SDB->setValue(I, Res); 7664 if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { 7665 if (LoadSDNode *LNode = 7666 dyn_cast<LoadSDNode>(Res.getOperand(0).getNode())) 7667 if (FrameIndexSDNode *FI = 7668 dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) 7669 FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); 7670 } 7671 7672 // If this argument is live outside of the entry block, insert a copy from 7673 // wherever we got it to the vreg that other BB's will reference it as. 7674 if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) { 7675 // If we can, though, try to skip creating an unnecessary vreg. 7676 // FIXME: This isn't very clean... it would be nice to make this more 7677 // general. It's also subtly incompatible with the hacks FastISel 7678 // uses with vregs. 7679 unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg(); 7680 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 7681 FuncInfo->ValueMap[I] = Reg; 7682 continue; 7683 } 7684 } 7685 if (!isOnlyUsedInEntryBlock(I, TM.Options.EnableFastISel)) { 7686 FuncInfo->InitializeRegForValue(I); 7687 SDB->CopyToExportRegsIfNeeded(I); 7688 } 7689 } 7690 7691 assert(i == InVals.size() && "Argument register count mismatch!"); 7692 7693 // Finally, if the target has anything special to do, allow it to do so. 7694 EmitFunctionEntryCode(); 7695 } 7696 7697 /// Handle PHI nodes in successor blocks. Emit code into the SelectionDAG to 7698 /// ensure constants are generated when needed. Remember the virtual registers 7699 /// that need to be added to the Machine PHI nodes as input. We cannot just 7700 /// directly add them, because expansion might result in multiple MBB's for one 7701 /// BB. As such, the start of the BB might correspond to a different MBB than 7702 /// the end. 7703 /// 7704 void 7705 SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { 7706 const TerminatorInst *TI = LLVMBB->getTerminator(); 7707 7708 SmallPtrSet<MachineBasicBlock *, 4> SuccsHandled; 7709 7710 // Check PHI nodes in successors that expect a value to be available from this 7711 // block. 7712 for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { 7713 const BasicBlock *SuccBB = TI->getSuccessor(succ); 7714 if (!isa<PHINode>(SuccBB->begin())) continue; 7715 MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; 7716 7717 // If this terminator has multiple identical successors (common for 7718 // switches), only handle each succ once. 7719 if (!SuccsHandled.insert(SuccMBB).second) 7720 continue; 7721 7722 MachineBasicBlock::iterator MBBI = SuccMBB->begin(); 7723 7724 // At this point we know that there is a 1-1 correspondence between LLVM PHI 7725 // nodes and Machine PHI nodes, but the incoming operands have not been 7726 // emitted yet. 7727 for (BasicBlock::const_iterator I = SuccBB->begin(); 7728 const PHINode *PN = dyn_cast<PHINode>(I); ++I) { 7729 // Ignore dead phi's. 7730 if (PN->use_empty()) continue; 7731 7732 // Skip empty types 7733 if (PN->getType()->isEmptyTy()) 7734 continue; 7735 7736 unsigned Reg; 7737 const Value *PHIOp = PN->getIncomingValueForBlock(LLVMBB); 7738 7739 if (const Constant *C = dyn_cast<Constant>(PHIOp)) { 7740 unsigned &RegOut = ConstantsOut[C]; 7741 if (RegOut == 0) { 7742 RegOut = FuncInfo.CreateRegs(C->getType()); 7743 CopyValueToVirtualRegister(C, RegOut); 7744 } 7745 Reg = RegOut; 7746 } else { 7747 DenseMap<const Value *, unsigned>::iterator I = 7748 FuncInfo.ValueMap.find(PHIOp); 7749 if (I != FuncInfo.ValueMap.end()) 7750 Reg = I->second; 7751 else { 7752 assert(isa<AllocaInst>(PHIOp) && 7753 FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) && 7754 "Didn't codegen value into a register!??"); 7755 Reg = FuncInfo.CreateRegs(PHIOp->getType()); 7756 CopyValueToVirtualRegister(PHIOp, Reg); 7757 } 7758 } 7759 7760 // Remember that this register needs to added to the machine PHI node as 7761 // the input for this MBB. 7762 SmallVector<EVT, 4> ValueVTs; 7763 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7764 ComputeValueVTs(TLI, PN->getType(), ValueVTs); 7765 for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { 7766 EVT VT = ValueVTs[vti]; 7767 unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); 7768 for (unsigned i = 0, e = NumRegisters; i != e; ++i) 7769 FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); 7770 Reg += NumRegisters; 7771 } 7772 } 7773 } 7774 7775 ConstantsOut.clear(); 7776 } 7777 7778 /// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB 7779 /// is 0. 7780 MachineBasicBlock * 7781 SelectionDAGBuilder::StackProtectorDescriptor:: 7782 AddSuccessorMBB(const BasicBlock *BB, 7783 MachineBasicBlock *ParentMBB, 7784 bool IsLikely, 7785 MachineBasicBlock *SuccMBB) { 7786 // If SuccBB has not been created yet, create it. 7787 if (!SuccMBB) { 7788 MachineFunction *MF = ParentMBB->getParent(); 7789 MachineFunction::iterator BBI = ParentMBB; 7790 SuccMBB = MF->CreateMachineBasicBlock(BB); 7791 MF->insert(++BBI, SuccMBB); 7792 } 7793 // Add it as a successor of ParentMBB. 7794 ParentMBB->addSuccessor( 7795 SuccMBB, BranchProbabilityInfo::getBranchWeightStackProtector(IsLikely)); 7796 return SuccMBB; 7797 } 7798 7799 MachineBasicBlock *SelectionDAGBuilder::NextBlock(MachineBasicBlock *MBB) { 7800 MachineFunction::iterator I = MBB; 7801 if (++I == FuncInfo.MF->end()) 7802 return nullptr; 7803 return I; 7804 } 7805