1 //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the cost model analysis. It provides a very basic cost 11 // estimation for LLVM-IR. This analysis uses the services of the codegen 12 // to approximate the cost of any IR instruction when lowered to machine 13 // instructions. The cost results are unit-less and the cost number represents 14 // the throughput of the machine assuming that all loads hit the cache, all 15 // branches are predicted, etc. The cost numbers can be added in order to 16 // compare two or more transformation alternatives. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/Analysis/Passes.h" 22 #include "llvm/Analysis/TargetTransformInfo.h" 23 #include "llvm/Analysis/VectorUtils.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/Instructions.h" 26 #include "llvm/IR/IntrinsicInst.h" 27 #include "llvm/IR/Value.h" 28 #include "llvm/Pass.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/raw_ostream.h" 32 using namespace llvm; 33 34 #define CM_NAME "cost-model" 35 #define DEBUG_TYPE CM_NAME 36 37 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), 38 cl::Hidden, 39 cl::desc("Recognize reduction patterns.")); 40 41 namespace { 42 class CostModelAnalysis : public FunctionPass { 43 44 public: 45 static char ID; // Class identification, replacement for typeinfo 46 CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { 47 initializeCostModelAnalysisPass( 48 *PassRegistry::getPassRegistry()); 49 } 50 51 /// Returns the expected cost of the instruction. 52 /// Returns -1 if the cost is unknown. 53 /// Note, this method does not cache the cost calculation and it 54 /// can be expensive in some cases. 55 unsigned getInstructionCost(const Instruction *I) const; 56 57 private: 58 void getAnalysisUsage(AnalysisUsage &AU) const override; 59 bool runOnFunction(Function &F) override; 60 void print(raw_ostream &OS, const Module*) const override; 61 62 /// The function that we analyze. 63 Function *F; 64 /// Target information. 65 const TargetTransformInfo *TTI; 66 }; 67 } // End of anonymous namespace 68 69 // Register this pass. 70 char CostModelAnalysis::ID = 0; 71 static const char cm_name[] = "Cost Model Analysis"; 72 INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) 73 INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) 74 75 FunctionPass *llvm::createCostModelAnalysisPass() { 76 return new CostModelAnalysis(); 77 } 78 79 void 80 CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 81 AU.setPreservesAll(); 82 } 83 84 bool 85 CostModelAnalysis::runOnFunction(Function &F) { 86 this->F = &F; 87 auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 88 TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr; 89 90 return false; 91 } 92 93 static bool isReverseVectorMask(ArrayRef<int> Mask) { 94 for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) 95 if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i)) 96 return false; 97 return true; 98 } 99 100 static bool isSingleSourceVectorMask(ArrayRef<int> Mask) { 101 bool Vec0 = false; 102 bool Vec1 = false; 103 for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) { 104 if (Mask[i] >= 0) { 105 if ((unsigned)Mask[i] >= NumVecElts) 106 Vec1 = true; 107 else 108 Vec0 = true; 109 } 110 } 111 return !(Vec0 && Vec1); 112 } 113 114 static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) { 115 for (unsigned i = 0; i < Mask.size(); ++i) 116 if (Mask[i] > 0) 117 return false; 118 return true; 119 } 120 121 static bool isAlternateVectorMask(ArrayRef<int> Mask) { 122 bool isAlternate = true; 123 unsigned MaskSize = Mask.size(); 124 125 // Example: shufflevector A, B, <0,5,2,7> 126 for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { 127 if (Mask[i] < 0) 128 continue; 129 isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); 130 } 131 132 if (isAlternate) 133 return true; 134 135 isAlternate = true; 136 // Example: shufflevector A, B, <4,1,6,3> 137 for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { 138 if (Mask[i] < 0) 139 continue; 140 isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); 141 } 142 143 return isAlternate; 144 } 145 146 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { 147 TargetTransformInfo::OperandValueKind OpInfo = 148 TargetTransformInfo::OK_AnyValue; 149 150 // Check for a splat of a constant or for a non uniform vector of constants. 151 if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { 152 OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; 153 if (cast<Constant>(V)->getSplatValue() != nullptr) 154 OpInfo = TargetTransformInfo::OK_UniformConstantValue; 155 } 156 157 // Check for a splat of a uniform value. This is not loop aware, so return 158 // true only for the obviously uniform cases (argument, globalvalue) 159 const Value *Splat = getSplatValue(V); 160 if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat))) 161 OpInfo = TargetTransformInfo::OK_UniformValue; 162 163 return OpInfo; 164 } 165 166 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, 167 unsigned Level) { 168 // We don't need a shuffle if we just want to have element 0 in position 0 of 169 // the vector. 170 if (!SI && Level == 0 && IsLeft) 171 return true; 172 else if (!SI) 173 return false; 174 175 SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); 176 177 // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether 178 // we look at the left or right side. 179 for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) 180 Mask[i] = val; 181 182 SmallVector<int, 16> ActualMask = SI->getShuffleMask(); 183 return Mask == ActualMask; 184 } 185 186 static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, 187 unsigned Level, unsigned NumLevels) { 188 // Match one level of pairwise operations. 189 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 190 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 191 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 192 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 193 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 194 if (BinOp == nullptr) 195 return false; 196 197 assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); 198 199 unsigned Opcode = BinOp->getOpcode(); 200 Value *L = BinOp->getOperand(0); 201 Value *R = BinOp->getOperand(1); 202 203 ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); 204 if (!LS && Level) 205 return false; 206 ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); 207 if (!RS && Level) 208 return false; 209 210 // On level 0 we can omit one shufflevector instruction. 211 if (!Level && !RS && !LS) 212 return false; 213 214 // Shuffle inputs must match. 215 Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; 216 Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; 217 Value *NextLevelOp = nullptr; 218 if (NextLevelOpR && NextLevelOpL) { 219 // If we have two shuffles their operands must match. 220 if (NextLevelOpL != NextLevelOpR) 221 return false; 222 223 NextLevelOp = NextLevelOpL; 224 } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { 225 // On the first level we can omit the shufflevector <0, undef,...>. So the 226 // input to the other shufflevector <1, undef> must match with one of the 227 // inputs to the current binary operation. 228 // Example: 229 // %NextLevelOpL = shufflevector %R, <1, undef ...> 230 // %BinOp = fadd %NextLevelOpL, %R 231 if (NextLevelOpL && NextLevelOpL != R) 232 return false; 233 else if (NextLevelOpR && NextLevelOpR != L) 234 return false; 235 236 NextLevelOp = NextLevelOpL ? R : L; 237 } else 238 return false; 239 240 // Check that the next levels binary operation exists and matches with the 241 // current one. 242 BinaryOperator *NextLevelBinOp = nullptr; 243 if (Level + 1 != NumLevels) { 244 if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) 245 return false; 246 else if (NextLevelBinOp->getOpcode() != Opcode) 247 return false; 248 } 249 250 // Shuffle mask for pairwise operation must match. 251 if (matchPairwiseShuffleMask(LS, true, Level)) { 252 if (!matchPairwiseShuffleMask(RS, false, Level)) 253 return false; 254 } else if (matchPairwiseShuffleMask(RS, true, Level)) { 255 if (!matchPairwiseShuffleMask(LS, false, Level)) 256 return false; 257 } else 258 return false; 259 260 if (++Level == NumLevels) 261 return true; 262 263 // Match next level. 264 return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); 265 } 266 267 static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, 268 unsigned &Opcode, Type *&Ty) { 269 if (!EnableReduxCost) 270 return false; 271 272 // Need to extract the first element. 273 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 274 unsigned Idx = ~0u; 275 if (CI) 276 Idx = CI->getZExtValue(); 277 if (Idx != 0) 278 return false; 279 280 BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 281 if (!RdxStart) 282 return false; 283 284 Type *VecTy = ReduxRoot->getOperand(0)->getType(); 285 unsigned NumVecElems = VecTy->getVectorNumElements(); 286 if (!isPowerOf2_32(NumVecElems)) 287 return false; 288 289 // We look for a sequence of shuffle,shuffle,add triples like the following 290 // that builds a pairwise reduction tree. 291 // 292 // (X0, X1, X2, X3) 293 // (X0 + X1, X2 + X3, undef, undef) 294 // ((X0 + X1) + (X2 + X3), undef, undef, undef) 295 // 296 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 297 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 298 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 299 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 300 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 301 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 302 // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 303 // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 304 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 305 // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 306 // %r = extractelement <4 x float> %bin.rdx8, i32 0 307 if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) 308 return false; 309 310 Opcode = RdxStart->getOpcode(); 311 Ty = VecTy; 312 313 return true; 314 } 315 316 static std::pair<Value *, ShuffleVectorInst *> 317 getShuffleAndOtherOprd(BinaryOperator *B) { 318 319 Value *L = B->getOperand(0); 320 Value *R = B->getOperand(1); 321 ShuffleVectorInst *S = nullptr; 322 323 if ((S = dyn_cast<ShuffleVectorInst>(L))) 324 return std::make_pair(R, S); 325 326 S = dyn_cast<ShuffleVectorInst>(R); 327 return std::make_pair(L, S); 328 } 329 330 static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, 331 unsigned &Opcode, Type *&Ty) { 332 if (!EnableReduxCost) 333 return false; 334 335 // Need to extract the first element. 336 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 337 unsigned Idx = ~0u; 338 if (CI) 339 Idx = CI->getZExtValue(); 340 if (Idx != 0) 341 return false; 342 343 BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 344 if (!RdxStart) 345 return false; 346 unsigned RdxOpcode = RdxStart->getOpcode(); 347 348 Type *VecTy = ReduxRoot->getOperand(0)->getType(); 349 unsigned NumVecElems = VecTy->getVectorNumElements(); 350 if (!isPowerOf2_32(NumVecElems)) 351 return false; 352 353 // We look for a sequence of shuffles and adds like the following matching one 354 // fadd, shuffle vector pair at a time. 355 // 356 // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, 357 // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 358 // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf 359 // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, 360 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 361 // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 362 // %r = extractelement <4 x float> %bin.rdx8, i32 0 363 364 unsigned MaskStart = 1; 365 Value *RdxOp = RdxStart; 366 SmallVector<int, 32> ShuffleMask(NumVecElems, 0); 367 unsigned NumVecElemsRemain = NumVecElems; 368 while (NumVecElemsRemain - 1) { 369 // Check for the right reduction operation. 370 BinaryOperator *BinOp; 371 if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) 372 return false; 373 if (BinOp->getOpcode() != RdxOpcode) 374 return false; 375 376 Value *NextRdxOp; 377 ShuffleVectorInst *Shuffle; 378 std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); 379 380 // Check the current reduction operation and the shuffle use the same value. 381 if (Shuffle == nullptr) 382 return false; 383 if (Shuffle->getOperand(0) != NextRdxOp) 384 return false; 385 386 // Check that shuffle masks matches. 387 for (unsigned j = 0; j != MaskStart; ++j) 388 ShuffleMask[j] = MaskStart + j; 389 // Fill the rest of the mask with -1 for undef. 390 std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); 391 392 SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 393 if (ShuffleMask != Mask) 394 return false; 395 396 RdxOp = NextRdxOp; 397 NumVecElemsRemain /= 2; 398 MaskStart *= 2; 399 } 400 401 Opcode = RdxOpcode; 402 Ty = VecTy; 403 return true; 404 } 405 406 unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { 407 if (!TTI) 408 return -1; 409 410 switch (I->getOpcode()) { 411 case Instruction::GetElementPtr: 412 return TTI->getUserCost(I); 413 414 case Instruction::Ret: 415 case Instruction::PHI: 416 case Instruction::Br: { 417 return TTI->getCFInstrCost(I->getOpcode()); 418 } 419 case Instruction::Add: 420 case Instruction::FAdd: 421 case Instruction::Sub: 422 case Instruction::FSub: 423 case Instruction::Mul: 424 case Instruction::FMul: 425 case Instruction::UDiv: 426 case Instruction::SDiv: 427 case Instruction::FDiv: 428 case Instruction::URem: 429 case Instruction::SRem: 430 case Instruction::FRem: 431 case Instruction::Shl: 432 case Instruction::LShr: 433 case Instruction::AShr: 434 case Instruction::And: 435 case Instruction::Or: 436 case Instruction::Xor: { 437 TargetTransformInfo::OperandValueKind Op1VK = 438 getOperandInfo(I->getOperand(0)); 439 TargetTransformInfo::OperandValueKind Op2VK = 440 getOperandInfo(I->getOperand(1)); 441 return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, 442 Op2VK); 443 } 444 case Instruction::Select: { 445 const SelectInst *SI = cast<SelectInst>(I); 446 Type *CondTy = SI->getCondition()->getType(); 447 return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); 448 } 449 case Instruction::ICmp: 450 case Instruction::FCmp: { 451 Type *ValTy = I->getOperand(0)->getType(); 452 return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); 453 } 454 case Instruction::Store: { 455 const StoreInst *SI = cast<StoreInst>(I); 456 Type *ValTy = SI->getValueOperand()->getType(); 457 return TTI->getMemoryOpCost(I->getOpcode(), ValTy, 458 SI->getAlignment(), 459 SI->getPointerAddressSpace()); 460 } 461 case Instruction::Load: { 462 const LoadInst *LI = cast<LoadInst>(I); 463 return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), 464 LI->getAlignment(), 465 LI->getPointerAddressSpace()); 466 } 467 case Instruction::ZExt: 468 case Instruction::SExt: 469 case Instruction::FPToUI: 470 case Instruction::FPToSI: 471 case Instruction::FPExt: 472 case Instruction::PtrToInt: 473 case Instruction::IntToPtr: 474 case Instruction::SIToFP: 475 case Instruction::UIToFP: 476 case Instruction::Trunc: 477 case Instruction::FPTrunc: 478 case Instruction::BitCast: 479 case Instruction::AddrSpaceCast: { 480 Type *SrcTy = I->getOperand(0)->getType(); 481 return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); 482 } 483 case Instruction::ExtractElement: { 484 const ExtractElementInst * EEI = cast<ExtractElementInst>(I); 485 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); 486 unsigned Idx = -1; 487 if (CI) 488 Idx = CI->getZExtValue(); 489 490 // Try to match a reduction sequence (series of shufflevector and vector 491 // adds followed by a extractelement). 492 unsigned ReduxOpCode; 493 Type *ReduxType; 494 495 if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) 496 return TTI->getReductionCost(ReduxOpCode, ReduxType, false); 497 else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) 498 return TTI->getReductionCost(ReduxOpCode, ReduxType, true); 499 500 return TTI->getVectorInstrCost(I->getOpcode(), 501 EEI->getOperand(0)->getType(), Idx); 502 } 503 case Instruction::InsertElement: { 504 const InsertElementInst * IE = cast<InsertElementInst>(I); 505 ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); 506 unsigned Idx = -1; 507 if (CI) 508 Idx = CI->getZExtValue(); 509 return TTI->getVectorInstrCost(I->getOpcode(), 510 IE->getType(), Idx); 511 } 512 case Instruction::ShuffleVector: { 513 const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); 514 Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); 515 unsigned NumVecElems = VecTypOp0->getVectorNumElements(); 516 SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 517 518 if (NumVecElems == Mask.size()) { 519 if (isReverseVectorMask(Mask)) 520 return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 521 0, nullptr); 522 if (isAlternateVectorMask(Mask)) 523 return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, 524 VecTypOp0, 0, nullptr); 525 526 if (isZeroEltBroadcastVectorMask(Mask)) 527 return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, 528 VecTypOp0, 0, nullptr); 529 530 if (isSingleSourceVectorMask(Mask)) 531 return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc, 532 VecTypOp0, 0, nullptr); 533 534 return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, 535 VecTypOp0, 0, nullptr); 536 } 537 538 return -1; 539 } 540 case Instruction::Call: 541 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 542 SmallVector<Value *, 4> Args; 543 for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) 544 Args.push_back(II->getArgOperand(J)); 545 546 FastMathFlags FMF; 547 if (auto *FPMO = dyn_cast<FPMathOperator>(II)) 548 FMF = FPMO->getFastMathFlags(); 549 550 return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), 551 Args, FMF); 552 } 553 return -1; 554 default: 555 // We don't have any information on this instruction. 556 return -1; 557 } 558 } 559 560 void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { 561 if (!F) 562 return; 563 564 for (BasicBlock &B : *F) { 565 for (Instruction &Inst : B) { 566 unsigned Cost = getInstructionCost(&Inst); 567 if (Cost != (unsigned)-1) 568 OS << "Cost Model: Found an estimated cost of " << Cost; 569 else 570 OS << "Cost Model: Unknown cost"; 571 572 OS << " for instruction: " << Inst << "\n"; 573 } 574 } 575 } 576