Lines Matching full:matmul
960 // If we have a TT matmul or a TT add, lift the transpose. We may be able
1355 /// Special case for MatMul lowering. Prevents scalar loads of row-major
1358 void lowerDotProduct(CallInst *MatMul,
1361 if (FusedInsts.contains(MatMul) ||
1364 ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
1365 ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
1370 Value *LHS = MatMul->getArgOperand(0);
1371 Value *RHS = MatMul->getArgOperand(1);
1482 FusedInsts.insert(MatMul);
1483 IRBuilder<> Builder(MatMul);
1486 // Matmul must be the only user of loads because we don't use LowerLoad
1520 LHS = MatMul->getArgOperand(0);
1537 // pack scalar back into a matrix and then replace matmul inst
1538 Result = Builder.CreateInsertElement(PoisonValue::get(MatMul->getType()),
1540 MatMul->replaceAllUsesWith(Result);
1541 FusedInsts.insert(MatMul);
1542 ToRemove.push_back(MatMul);
1637 CallInst *MatMul) {
1649 BasicBlock *Check0 = MatMul->getParent();
1658 SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
1661 SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
1664 SplitBlock(MatMul->getParent(), MatMul, (DomTreeUpdater *)nullptr, LI,
1670 IRBuilder<> Builder(MatMul);
1721 bool isFusionProfitable(CallInst *MatMul) {
1725 ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
1726 ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
1731 auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
1765 void createTiledLoops(CallInst *MatMul, Value *LPtr, ShapeInfo LShape,
1767 auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
1772 Instruction *InsertI = cast<Instruction>(MatMul);
1776 IRBuilder<> Builder(MatMul);
1780 FixedVectorType::get(MatMul->getType()->getScalarType(), TileSize);
1805 getFastMathFlags(MatMul));
1824 void emitSIMDTiling(CallInst *MatMul, LoadInst *LoadOp0, LoadInst *LoadOp1,
1829 if (!isFusionProfitable(MatMul))
1832 ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
1833 ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
1838 auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
1840 Value *APtr = getNonAliasingPointer(LoadOp0, Store, MatMul);
1841 Value *BPtr = getNonAliasingPointer(LoadOp1, Store, MatMul);
1845 createTiledLoops(MatMul, APtr, LShape, BPtr, RShape, Store);
1865 getFastMathFlags(MatMul));
1875 FusedInsts.insert(MatMul);
1877 eraseFromParentAndRemoveFromShapeMap(MatMul);
1893 LowerMatrixMultiplyFused(CallInst *MatMul,
1901 Value *A = MatMul->getArgOperand(0);
1902 Value *B = MatMul->getArgOperand(1);
1909 IRBuilder<> Builder(MatMul);
1910 auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
1911 ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
1912 ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
1935 getFastMathFlags(MatMul));
1937 FusedInsts.insert(MatMul);
1945 finalizeLowering(MatMul, Result, Builder);
1949 if (!MatMul->hasOneUse() || MatrixLayout != MatrixLayoutTy::ColumnMajor)
1952 // Lower {ld, ld} -> matmul -> st chains. No need to call finalizeLowering
1956 auto *Store = dyn_cast<StoreInst>(*MatMul->user_begin());
1958 // The store address must dominate the MatMul instruction, otherwise
1970 if (DT->dominates(CurrI, MatMul))
1982 I->moveBefore(MatMul->getIterator());
2033 emitSIMDTiling(MatMul, LoadOp0, LoadOp1, Store, FusedInsts);
2039 void LowerMultiply(CallInst *MatMul) {
2040 IRBuilder<> Builder(MatMul);
2041 auto *EltType = cast<VectorType>(MatMul->getType())->getElementType();
2042 ShapeInfo LShape(MatMul->getArgOperand(2), MatMul->getArgOperand(3));
2043 ShapeInfo RShape(MatMul->getArgOperand(3), MatMul->getArgOperand(4));
2045 const MatrixTy &Lhs = getMatrix(MatMul->getArgOperand(0), LShape, Builder);
2046 const MatrixTy &Rhs = getMatrix(MatMul->getArgOperand(1), RShape, Builder);
2060 getFastMathFlags(MatMul));
2061 finalizeLowering(MatMul, Result, Builder);