LowerMatrixIntrinsics.cpp - OpenGrok cross reference for /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp

Lines Matching +full:num +full:- +full:vectors
1 //===- LowerMatrixIntrinsics.cpp -  Lower matrix intrinsics -----*- C++ -*-===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
13 //   * Support more cases, e.g. multiply-add, multiply-sub, operands/results
15 //   * Improve cost-modeling, e.g. choose different number of rows/columns
18 //===----------------------------------------------------------------------===//
53 #define DEBUG_TYPE "lower-matrix-intrinsics"
56     FuseMatrix("fuse-matrix", cl::init(true), cl::Hidden,
58 // TODO: Allow and use non-square tiles.
60     "fuse-matrix-tile-size", cl::init(4), cl::Hidden,
62         "Tile size for matrix instruction fusion using square-shaped tiles."));
63 static cl::opt<bool> TileUseLoops("fuse-matrix-use-loops", cl::init(false),
67     "force-fuse-matrix", cl::init(false), cl::Hidden,
70     "matrix-allow-contract", cl::init(false), cl::Hidden,
75     VerifyShapeInfo("verify-matrix-shapes", cl::Hidden,
82     "matrix-default-layout", cl::init(MatrixLayoutTy::ColumnMajor),
84     cl::values(clEnumValN(MatrixLayoutTy::ColumnMajor, "column-major",
85                           "Use column-major layout"),
86                clEnumValN(MatrixLayoutTy::RowMajor, "row-major",
87                           "Use row-major layout")));
89 static cl::opt<bool> PrintAfterTransposeOpt("matrix-print-after-transpose-opt",
97   return cast<DILocalScope>(Scope)->getSubprogram();
106   if (!Inst->use_empty())
110   Inst->eraseFromParent();
117     return SV->isZeroEltSplat();
137 // assuming \p Stride elements between start two consecutive vectors.
139 // For column-major matrixes, the function computes the address of a column
140 // vectors and \p NumElements must be set to the number of elements in a column
141 // (= number of rows of the matrix). For row-major matrixes, the function
145 // Consider a 4x4 matrix in column-mjaor layout like below
153 // To compute the column addresses for a 2x3 sub-matrix at row 1 and column 1,
156 // of the sub-matrix.
158 // Column 0: computeVectorAddr(Base, 0 (column), 4 (stride), 2 (num rows), ..)
159 //           -> just returns Base
160 // Column 1: computeVectorAddr(Base, 1 (column), 4 (stride), 2 (num rows), ..)
161 //           -> returns Base + (1 * 4)
162 // Column 2: computeVectorAddr(Base, 2 (column), 4 (stride), 2 (num rows), ..)
163 //           -> returns Base + (2 * 4)
180           cast<ConstantInt>(Stride)->getZExtValue() >= NumElements) &&
188   if (isa<ConstantInt>(VecStart) && cast<ConstantInt>(VecStart)->isZero())
208       : ShapeInfo(cast<ConstantInt>(NumRows)->getZExtValue(),
209                   cast<ConstantInt>(NumColumns)->getZExtValue()) {}
216   /// Returns true if shape-information is defined, meaning both dimensions
245   switch (I->getOpcode()) {
285       return OpShape->second;
290     for (auto &Op : I->operands()) {
293         return OpShape->second;
304 /// 2. Lower instructions with shape information (assuming column-major layout).
305 ///  The lowering works similarly using row-major layout.
306 ///  2.1. Get column vectors for each argument. If we already lowered the
307 ///       definition of an argument, use the produced column vectors directly.
309 ///       a set of column vectors,
311 ///       yields a set of column vectors containing result matrix. Note that we
353   /// Wrapper class representing a matrix as a set of vectors, either in row or
354   /// column major layout. All vectors must have the same vector type.
356     SmallVector<Value *, 16> Vectors;
364     MatrixTy(ArrayRef<Value *> Vectors)
365         : Vectors(Vectors.begin(), Vectors.end()),
376     Value *getVector(unsigned i) const { return Vectors[i]; }
378       assert(isColumnMajor() && "only supported for column-major matrixes");
379       return Vectors[i];
382       assert(!isColumnMajor() && "only supported for row-major matrixes");
383       return Vectors[i];
386     void setVector(unsigned i, Value *V) { Vectors[i] = V; }
388     Type *getElementType() const { return getVectorTy()->getElementType(); }
398         return Vectors.size();
400         assert(Vectors.size() > 0 && "Cannot call getNumRows without columns");
401         return cast<FixedVectorType>(Vectors[0]->getType())->getNumElements();
406         assert(Vectors.size() > 0 && "Cannot call getNumRows without columns");
407         return cast<FixedVectorType>(Vectors[0]->getType())->getNumElements();
409         return Vectors.size();
412     void addVector(Value *V) { Vectors.push_back(V); }
414       assert(isColumnMajor() && "only supported for column-major matrixes");
419       return cast<VectorType>(Vectors[0]->getType());
424              "columns() only supported for column-major matrixes");
425       return make_range(Vectors.begin(), Vectors.end());
428     iterator_range<SmallVector<Value *, 8>::iterator> vectors() {
429       return make_range(Vectors.begin(), Vectors.end());
432     /// Embed the vectors of the matrix into a flat vector by concatenating
435       return Vectors.size() == 1 ? Vectors[0]
436                                  : concatenateVectors(Builder, Vectors);
476     /// matrix is column-major, the result vector is extracted from a column
481       assert(cast<FixedVectorType>(Vec->getType())->getNumElements() >=
496   /// sub-passes like optimizeTransposes performs RAUW the map stays
497   /// up-to-date.
513       FMF = Inst->getFastMathFlags();
529     return getNumOps(VT->getScalarType(),
530                      cast<FixedVectorType>(VT)->getNumElements());
541     return std::ceil((ST->getPrimitiveSizeInBits() * N).getFixedValue() /
547   /// Return the set of vectors that a matrix value is lowered to.
551   /// into vectors.
554     VectorType *VType = dyn_cast<VectorType>(MatrixVal->getType());
556     assert(cast<FixedVectorType>(VType)->getNumElements() ==
562     // information. If there is a mis-match, embed the result in a flat
566       MatrixTy &M = Found->second;
578          MaskStart < cast<FixedVectorType>(VType)->getNumElements();
598       if (VerifyShapeInfo && (SIter->second.NumRows != Shape.NumRows ||
599                               SIter->second.NumColumns != Shape.NumColumns)) {
600         errs() << "Conflicting shapes (" << SIter->second.NumRows << "x"
601                << SIter->second.NumColumns << " vs " << Shape.NumRows << "x"
608                         << SIter->second.NumRows << " "
609                         << SIter->second.NumColumns << " for " << *V << "\n");
628       switch (II->getIntrinsicID()) {
650     LLVM_DEBUG(dbgs() << "Forward-propagate shapes:\n");
661         for (auto *User : Inst->users())
685     LLVM_DEBUG(dbgs() << "Backward-propagate shapes:\n");
722         // Nothing to do.  We forward-propagated to this so we would just
727         for (Use &U : cast<Instruction>(V)->operands()) {
736         for (User *U : WorkList[I]->users())
743   /// (Op0 op Op1)^T -> Op0^T op Op1^T
752         Op0, Shape0.NumRows, Shape0.NumColumns, Op0->getName() + "_t");
757         Op1, Shape1.NumRows, Shape1.NumColumns, Op1->getName() + "_t");
770         ShapeMap.insert({New, S->second});
775   /// Sink a top-level transpose inside matmuls and adds.
799     // k^T -> k
806     // (A * B)^t -> B^t * A^t
826     // (A * k)^t -> A^t * k
836             bool IsFP = I.getType()->isFPOrFPVectorTy();
849     // (A + B)^t -> A^t + B^t
856             bool IsFP = I.getType()->isFPOrFPVectorTy();
878       if (A->use_empty())
879         cast<Instruction>(A)->eraseFromParent();
880       if (A != B && B->use_empty())
881         cast<Instruction>(B)->eraseFromParent();
886     // A^t * B ^t -> (B * A)^t
895           BT, AT, C->getZExtValue(), K->getZExtValue(), R->getZExtValue());
897       Instruction *NewInst = Builder.CreateMatrixTranspose(M, C->getZExtValue(),
898                                                            R->getZExtValue());
902     // A^t + B ^t -> (A + B)^t. Pick rows and columns from first transpose. If
915           Add, R->getZExtValue(), C->getZExtValue(), "mfadd_t");
961         switch (II->getIntrinsicID()) {
1049     // having to update as many def-use and use-def chains.
1059       for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
1062         U.set(PoisonValue::get(Inst->getType()));
1064       Inst->eraseFromParent();
1080     if (!Inst->getCalledFunction() || !Inst->getCalledFunction()->isIntrinsic())
1083     switch (Inst->getCalledFunction()->getIntrinsicID()) {
1105   /// non-ConstantInt strides, return the common alignment of the initial
1116           ConstStride->getZExtValue() * ElementSizeInBits / 8;
1123   /// vectors.
1127     Type *EltTy = VType->getElementType();
1133           EltPtr, Builder.getIntN(Stride->getType()->getScalarSizeInBits(), I),
1145   /// Loads a sub-matrix with shape \p ResultShape from a \p R x \p C matrix,
1169                      loadMatrix(Inst->getType(), Ptr, Align, Stride, IsVolatile,
1179            "Intrinsic only supports column-major layout!");
1180     Value *Ptr = Inst->getArgOperand(0);
1181     Value *Stride = Inst->getArgOperand(1);
1182     LowerLoad(Inst, Ptr, Inst->getParamAlign(0), Stride,
1183               cast<ConstantInt>(Inst->getArgOperand(2))->isOne(),
1184               {Inst->getArgOperand(3), Inst->getArgOperand(4)});
1187   /// Stores a sub-matrix \p StoreVal into the \p R x \p C matrix starting at \p
1204   /// vectors.
1210     for (auto Vec : enumerate(StoreVal.vectors())) {
1213           Builder.getIntN(Stride->getType()->getScalarSizeInBits(),
1215           Stride, StoreVal.getStride(), VType->getElementType(), Builder);
1218                                                   VType->getElementType(),
1232                      storeMatrix(Matrix->getType(), StoreVal, Ptr, A, Stride,
1242            "Intrinsic only supports column-major layout!");
1243     Value *Matrix = Inst->getArgOperand(0);
1244     Value *Ptr = Inst->getArgOperand(1);
1245     Value *Stride = Inst->getArgOperand(2);
1246     LowerStore(Inst, Matrix, Ptr, Inst->getParamAlign(1), Stride,
1247                cast<ConstantInt>(Inst->getArgOperand(3))->isOne(),
1248                {Inst->getArgOperand(4), Inst->getArgOperand(5)});
1251   // Set elements I..I+NumElts-1 to Block
1257         cast<FixedVectorType>(Block->getType())->getNumElements();
1258     unsigned NumElts = cast<FixedVectorType>(Col->getType())->getNumElements();
1262         Block, createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts));
1272         cast<FixedVectorType>(Col->getType())->getNumElements();
1274       Mask.push_back(i - I + VecNumElts);
1285     NumComputeOps += getNumOps(A->getType());
1294             Func.getParent(), Intrinsic::fmuladd, A->getType());
1297       NumComputeOps += getNumOps(A->getType());
1302     NumComputeOps += getNumOps(A->getType());
1320     for (Use &U : llvm::make_early_inc_range(Inst->uses())) {
1329   /// Special case for MatMul lowering. Prevents scalar loads of row-major
1330   /// vectors Lowers to vector reduction add instead of sequential add if
1338     ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
1339     ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
1344     Value *LHS = MatMul->getArgOperand(0);
1345     Value *RHS = MatMul->getArgOperand(1);
1347     Type *ElementType = cast<VectorType>(LHS->getType())->getElementType();
1348     bool IsIntVec = ElementType->isIntegerTy();
1366     // dot-product lowering.
1374       FixedVectorType *VecTy = cast<FixedVectorType>(Op->getType());
1375       Type *EltTy = VecTy->getElementType();
1389             TTI.getArithmeticInstrCost(cast<Instruction>(Op)->getOpcode(),
1393             cast<Instruction>(Op)->getOpcode(), VecTy);
1394         return NewCost - OriginalCost;
1403           EmbedCost -=
1413       return TTI.getMemoryOpCost(Instruction::Load, VecTy, Align(1), 0) -
1437         WorkList.append(I->op_begin(), I->op_end());
1445             AddOpCode, cast<VectorType>(LHS->getType()),
1447         TTI.getArithmeticInstrCost(MulOpCode, LHS->getType());
1450             (LShape.NumColumns - 1) +
1453     if ((LHSCost + ReductionCost - SequentialAddCost) > InstructionCost(0))
1461       // for row vectors (LowerLoad results in scalar loads and shufflevectors
1476         auto *NewLoad = Builder.CreateLoad(Op->getType(), Arg);
1477         Op->replaceAllUsesWith(NewLoad);
1478         cast<Instruction>(Op)->eraseFromParent();
1483         Op->replaceAllUsesWith(Arg);
1491     LHS = MatMul->getArgOperand(0);
1502           ConstantFP::get(cast<VectorType>(LHS->getType())->getElementType(),
1505       cast<Instruction>(Result)->setFastMathFlags(FMF);
1509     Result = Builder.CreateInsertElement(PoisonValue::get(MatMul->getType()),
1511     MatMul->replaceAllUsesWith(Result);
1516   /// Compute \p Result += \p A * \p B for input matrices with left-associating
1520   /// This is the first operands with row-major and the second with
1521   /// column-major.  If \p IsScalarMatrixTransposed we assume the appropriate
1529             Result.getElementType()->getPrimitiveSizeInBits().getFixedValue(),
1535     bool IsFP = Result.getElementType()->isFloatingPointTy();
1613     if (AA->isNoAlias(LoadLoc, StoreLoc))
1614       return Load->getPointerOperand();
1620     BasicBlock *Check0 = MatMul->getParent();
1626       DTUpdates.push_back({DT->Delete, Check0, Succ});
1629         SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
1632         SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
1635         SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
1642     Check0->getTerminator()->eraseFromParent();
1644     Type *IntPtrTy = Builder.getIntPtrTy(Load->getDataLayout());
1658     Check1->getTerminator()->eraseFromParent();
1659     Builder.SetInsertPoint(Check1, Check1->begin());
1667     Builder.SetInsertPoint(Copy, Copy->begin());
1668     auto *VT = cast<FixedVectorType>(Load->getType());
1671     auto *ArrayTy = ArrayType::get(VT->getElementType(), VT->getNumElements());
1673         Builder.CreateAlloca(ArrayTy, Load->getPointerAddressSpace());
1675     Builder.CreateMemCpy(Alloca, Alloca->getAlign(), Load->getPointerOperand(),
1676                          Load->getAlign(), LoadLoc.Size.getValue());
1677     Builder.SetInsertPoint(Fusion, Fusion->begin());
1678     PHINode *PHI = Builder.CreatePHI(Load->getPointerOperandType(), 3);
1679     PHI->addIncoming(Load->getPointerOperand(), Check0);
1680     PHI->addIncoming(Load->getPointerOperand(), Check1);
1681     PHI->addIncoming(Alloca, Copy);
1684     DTUpdates.push_back({DT->Insert, Check0, Check1});
1685     DTUpdates.push_back({DT->Insert, Check0, Fusion});
1686     DTUpdates.push_back({DT->Insert, Check1, Copy});
1687     DTUpdates.push_back({DT->Insert, Check1, Fusion});
1688     DT->applyUpdates(DTUpdates);
1696     ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
1697     ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
1702     auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
1707             EltType->getPrimitiveSizeInBits().getFixedValue(),
1722     unsigned Op0Regs = (R + VF - 1) / VF * M;
1723     unsigned Op1Regs = (M + VF - 1) / VF * C;
1738     auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
1744     BasicBlock *Start = InsertI->getParent();
1746         SplitBlock(InsertI->getParent(), InsertI, DT, LI, nullptr, "continue");
1751         FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize);
1754     Builder.SetInsertPoint(TI.KLoop.Header->getTerminator());
1759       Phi->addIncoming(ConstantAggregateZero::get(TileVecTy),
1760                        TI.RowLoop.Header->getSingleSuccessor());
1767     Builder.SetInsertPoint(InnerBody->getTerminator());
1778     Builder.SetInsertPoint(TI.RowLoop.Latch->getTerminator());
1779     storeMatrix(TileResult, Store->getPointerOperand(), Store->getAlign(),
1780                 Store->isVolatile(), {LShape.NumRows, RShape.NumColumns},
1784       ColumnPhis[I]->addIncoming(TileResult.getVector(I), TI.KLoop.Latch);
1789     // currently the cost-model is not up to the task.
1791     addStringMetadataToLoop(LI->getLoopFor(TI.KLoop.Header),
1799            "Tiling only supported for column-major matrixes at the moment!");
1803     ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
1804     ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
1809     auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
1813     Value *CPtr = Store->getPointerOperand();
1821           const unsigned TileR = std::min(R - I, unsigned(TileSize));
1822           const unsigned TileC = std::min(C - J, unsigned(TileSize));
1826             const unsigned TileM = std::min(M - K, unsigned(TileSize));
1828                 loadMatrix(APtr, LoadOp0->getAlign(), LoadOp0->isVolatile(),
1832                 loadMatrix(BPtr, LoadOp1->getAlign(), LoadOp1->isVolatile(),
1838           storeMatrix(Res, CPtr, Store->getAlign(), Store->isVolatile(), {R, M},
1847     Store->eraseFromParent();
1848     MatMul->eraseFromParent();
1849     if (LoadOp0->hasNUses(0)) {
1851       LoadOp0->eraseFromParent();
1853     if (LoadOp1 != LoadOp0 && LoadOp1->hasNUses(0)) {
1855       LoadOp1->eraseFromParent();
1872     Value *A = MatMul->getArgOperand(0);
1873     Value *B = MatMul->getArgOperand(1);
1881       auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
1882       ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
1883       ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
1909       if (Transpose->hasOneUse()) {
1920     if (!MatMul->hasOneUse() || MatrixLayout != MatrixLayoutTy::ColumnMajor)
1923     // Lower {ld, ld} -> matmul -> st chains.  No need to call finalizeLowering
1927     auto *Store = dyn_cast<StoreInst>(*MatMul->user_begin());
1932       WorkList.insert(Store->getOperand(1));
1941         if (DT->dominates(CurrI, MatMul))
1943         if (CurrI->mayHaveSideEffects() || CurrI->mayReadFromMemory())
1946         WorkList.insert(CurrI->op_begin(), CurrI->op_end());
1950         return DT->dominates(A, B);
1953         I->moveBefore(MatMul);
1964       BasicBlock *StoreParent = Store->getParent();
1965       bool FusableOpsInSameBlock = LoadOp0->getParent() == StoreParent &&
1966                                    LoadOp1->getParent() == StoreParent;
1972         if (DT->dominates(End, LoadOp0) && DT->dominates(End, LoadOp1))
1974         if (DT->dominates(Store, End))
1978         if (FusableOpsInSameBlock && End->getParent() != StoreParent)
1986         if (AA->isNoAlias(Load0Loc, EndLoc) && AA->isNoAlias(Load1Loc, EndLoc))
1992         if (End->getParent() == StoreParent) {
1993           End->moveAfter(Store);
2012     auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
2013     ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
2014     ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
2016     const MatrixTy &Lhs = getMatrix(MatMul->getArgOperand(0), LShape, Builder);
2017     const MatrixTy &Rhs = getMatrix(MatMul->getArgOperand(1), RShape, Builder);
2039     Value *InputVal = Inst->getArgOperand(0);
2040     VectorType *VectorTy = cast<VectorType>(InputVal->getType());
2041     ShapeInfo ArgShape(Inst->getArgOperand(1), Inst->getArgOperand(2));
2052           FixedVectorType::get(VectorTy->getElementType(), NewNumElts));
2054       for (auto J : enumerate(InputMatrix.vectors())) {
2079     LowerLoad(Inst, Ptr, Inst->getAlign(),
2080               Builder.getInt64(I->second.getStride()), Inst->isVolatile(),
2081               I->second);
2091     LowerStore(Inst, StoredVal, Ptr, Inst->getAlign(),
2092                Builder.getInt64(I->second.getStride()), Inst->isVolatile(),
2093                I->second);
2103     Value *Lhs = Inst->getOperand(0);
2104     Value *Rhs = Inst->getOperand(1);
2107     ShapeInfo &Shape = I->second;
2118     // Helper to perform binary op on vectors.
2120       switch (Inst->getOpcode()) {
2154     Value *Op = Inst->getOperand(0);
2157     ShapeInfo &Shape = I->second;
2164     // Helper to perform unary op on vectors.
2166       switch (Inst->getOpcode()) {
2208     /// Used to keep track of sub-expressions that get reused while linearizing
2209     /// the expression. Re-used sub-expressions are marked as (reused).
2247       else if (V->getType()->isPointerTy())
2262         SS << M->second.getNumRows();
2264         SS << M->second.getNumColumns();
2272       if (!CI->getCalledFunction())
2275         StringRef Name = CI->getCalledFunction()->getName();
2281         write(Intrinsic::getBaseName(II->getIntrinsicID())
2287         switch (II->getIntrinsicID()) {
2289           prettyPrintMatrixType(II->getOperand(0), SS);
2291           prettyPrintMatrixType(II->getOperand(1), SS);
2292           SS << "." << *II->getType()->getScalarType();
2295           prettyPrintMatrixType(II->getOperand(0), SS);
2296           SS << "." << *II->getType()->getScalarType();
2300           SS << "." << *II->getType()->getScalarType();
2303           prettyPrintMatrixType(II->getOperand(0), SS);
2304           SS << "." << *II->getOperand(0)->getType()->getScalarType();
2316         switch (II->getIntrinsicID()) {
2336       if (V->getType()->isPointerTy()) {
2344         if (!V->getName().empty()) {
2345           Stream << " %" << V->getName() << "";
2346           LineLength += V->getName().size() + 2;
2355         TmpStream << CI->getValue();
2371     /// Expressions that are re-used multiple times are prefixed with (reused)
2372     /// at the re-used root instruction.
2385         assert(SI != Shared.end() && SI->second.count(Leaf));
2387         for (Value *S : SI->second) {
2390           DebugLoc DL = cast<Instruction>(S)->getDebugLoc();
2394         ExprShared = SI->second.size() > 1;
2404         Ops.append(CI->arg_begin(), CI->arg_end() - getNumShapeArgs(CI));
2407         // non-matrix ops.
2411         Ops.append(I->value_op_begin(), I->value_op_end());
2412         write(std::string(I->getOpcodeName()));
2445   /// 1. Use the inlined-at debug information to group matrix operations to the
2448   ///    RemarkGenerator::getExpressionLeaves) for each subprogram - expression
2453   ///    bottom-up traversal of the matrix operands, starting at a leaf. Note
2454   ///    that multiple leaves can share sub-expressions. Shared subexpressions
2474         if (Expr->getType()->isVoidTy() ||
2475             !any_of(Expr->users(), [&ExprsInSubprogram](User *U) {
2493       I.first->second.insert(Leaf);
2495       for (Value *Op : cast<Instruction>(V)->operand_values())
2518       if (I->second.size() == 1)
2519         Count = CM->second.getOpInfo();
2521         SharedCount = CM->second.getOpInfo();
2523       for (Value *Op : cast<Instruction>(Root)->operand_values()) {
2542           DILocation *Context = I->getDebugLoc();
2545                 Subprog2Exprs.insert({getSubprogram(Context->getScope()), {}});
2546             I.first->second.push_back(KV.first);
2551           I.first->second.push_back(KV.first);
2566           DebugLoc Loc = cast<Instruction>(L)->getDebugLoc();
2567           DILocation *Context = cast<Instruction>(L)->getDebugLoc();
2569             if (getSubprogram(Context->getScope()) == KV.first) {
2581           OptimizationRemark Rem(DEBUG_TYPE, "matrix-lowered", Loc,
2582                                  cast<Instruction>(L)->getParent());
2650   static_cast<PassInfoMixin<LowerMatrixIntrinsicsPass> *>(this)->printPipeline(