Lines Matching +full:isa +full:- +full:base
1 //===- ARMParallelDSP.cpp - Parallel DSP Pass -----------------------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// Armv6 introduced instructions to perform 32-bit SIMD operations. The
12 /// DSP intrinsics, which map on these 32-bit SIMD operations.
15 //===----------------------------------------------------------------------===//
42 #define DEBUG_TYPE "arm-parallel-dsp"
47 DisableParallelDSP("disable-arm-parallel-dsp", cl::Hidden, cl::init(false),
51 NumLoadLimit("arm-parallel-dsp-load-limit", cl::Hidden, cl::init(16),
76 return isa<LoadInst>(LHS) && isa<LoadInst>(RHS);
84 /// Represent a sequence of multiply-accumulate operations with the aim to
104 auto GetMulOperand = [](Value *V) -> Instruction* {
106 if (auto *I = dyn_cast<Instruction>(SExt->getOperand(0)))
107 if (I->getOpcode() == Instruction::Mul)
110 if (I->getOpcode() == Instruction::Mul)
117 Value *LHS = cast<Instruction>(I->getOperand(0))->getOperand(0);
118 Value *RHS = cast<Instruction>(I->getOperand(1))->getOperand(0);
125 if (auto *Mul = GetMulOperand(Add->getOperand(0)))
127 if (auto *Mul = GetMulOperand(Add->getOperand(1)))
147 << *Mul0->Root << "\n"
148 << *Mul1->Root << "\n");
149 Mul0->Paired = true;
150 Mul1->Paired = true;
152 Mul1->Exchange = true;
159 bool is64Bit() const { return Root->getType()->isIntegerTy(64); }
161 Type *getType() const { return Root->getType(); }
179 Root->replaceAllUsesWith(SMLAD);
187 LLVM_DEBUG(dbgs() << *Mul->Root << "\n"
188 << " " << *Mul->LHS << "\n"
189 << " " << *Mul->RHS << "\n");
229 /// Try to match and generate: SMLAD, SMLADX - Signed Multiply Accumulate
230 /// Dual performs two signed 16x16-bit multiplications. It adds the
231 /// products to a 32-bit accumulate operand. Optionally, the instruction can
267 DL = &M->getDataLayout();
272 if (!ST->allowsUnalignedMem()) {
278 if (!ST->hasDSP()) {
284 if (!ST->isLittle()) {
291 LLVM_DEBUG(dbgs() << " - " << F.getName() << "\n\n");
308 dbgs() << "Ld0:"; Ld0->dump();
309 dbgs() << "Ld1:"; Ld1->dump();
326 if (SExt->getSrcTy()->getIntegerBitWidth() != MaxBitWidth)
329 if (auto *Ld = dyn_cast<LoadInst>(SExt->getOperand(0))) {
337 /// Iterate through the block and record base, offset pairs of loads which can
346 // record loads which are simple, sign-extended and have a single user.
347 // TODO: Allow zero-extended loads.
352 if (!Ld || !Ld->isSimple() ||
353 !Ld->hasOneUse() || !isa<SExtInst>(Ld->user_back()))
370 MemoryLocation(Read->getPointerOperand(), Size);
372 if (!isModOrRefSet(AA->getModRefInfo(Write, ReadLoc)))
374 if (Write->comesBefore(Read))
381 auto SafeToPair = [&](LoadInst *Base, LoadInst *Offset) {
382 bool BaseFirst = Base->comesBefore(Offset);
383 LoadInst *Dominator = BaseFirst ? Base : Offset;
384 LoadInst *Dominated = BaseFirst ? Offset : Base;
392 if (Dominator->comesBefore(Before))
399 // Record base, offset load pairs.
400 for (auto *Base : Loads) {
402 if (Base == Offset || OffsetLoads.count(Offset))
405 if (isConsecutiveAccess(Base, Offset, *DL, *SE) &&
406 SafeToPair(Base, Offset)) {
407 LoadPairs[Base] = Offset;
425 // form a multiply-accumulate chain. The search records the Add and Mul
429 // If we find a non-instruction, try to use it as the initial accumulator
436 if (I->getParent() != BB)
439 switch (I->getOpcode()) {
450 Value *LHS = I->getOperand(0);
451 Value *RHS = I->getOperand(1);
465 Value *MulOp0 = I->getOperand(0);
466 Value *MulOp1 = I->getOperand(1);
470 return Search(I->getOperand(0), BB, R);
475 // The pass needs to identify integer add/sub reductions of 16-bit vector
522 if (!Ty->isIntegerTy(32) && !Ty->isIntegerTy(64))
553 if (!MulCand->HasTwoLoadInputs())
562 auto Ld0 = static_cast<LoadInst*>(PMul0->LHS);
563 auto Ld1 = static_cast<LoadInst*>(PMul1->LHS);
564 auto Ld2 = static_cast<LoadInst*>(PMul0->RHS);
565 auto Ld3 = static_cast<LoadInst*>(PMul1->RHS);
571 if (AreSequentialLoads(Ld0, Ld1, PMul0->VecLd)) {
572 if (AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
576 } else if (AreSequentialLoads(Ld3, Ld2, PMul1->VecLd)) {
582 } else if (AreSequentialLoads(Ld1, Ld0, PMul0->VecLd) &&
583 AreSequentialLoads(Ld2, Ld3, PMul1->VecLd)) {
598 if (PMul0->Paired)
606 if (PMul1->Paired)
609 const Instruction *Mul0 = PMul0->Root;
610 const Instruction *Mul1 = PMul1->Root;
633 SMLAD = Acc->getType()->isIntegerTy(32) ?
637 SMLAD = Acc->getType()->isIntegerTy(32) ?
641 IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
650 assert((isa<Instruction>(A) || isa<Instruction>(B)) &&
654 if (!isa<Instruction>(A))
656 else if (!isa<Instruction>(B))
659 V = DT->dominates(cast<Instruction>(A), cast<Instruction>(B)) ? B : A;
668 IRBuilder<NoFolder> Builder(R.getRoot()->getParent());
671 if (MulCand->Paired)
674 Instruction *Mul = cast<Instruction>(MulCand->Root);
677 if (R.getType() != Mul->getType()) {
678 assert(R.is64Bit() && "expected 64-bit result");
680 Mul = cast<Instruction>(Builder.CreateSExt(Mul, R.getRoot()->getType()));
697 ConstantInt::get(IntegerType::get(M->getContext(), 64), 0) :
698 ConstantInt::get(IntegerType::get(M->getContext(), 32), 0);
699 } else if (Acc->getType() != R.getType()) {
706 const Instruction *A = PairA.first->Root;
707 const Instruction *B = PairB.first->Root;
708 return A->comesBefore(B);
711 IntegerType *Ty = IntegerType::get(M->getContext(), 32);
715 LoadInst *BaseLHS = LHSMul->getBaseLoad();
716 LoadInst *BaseRHS = RHSMul->getBaseLoad();
718 WideLoads[BaseLHS]->getLoad() : CreateWideLoad(LHSMul->VecLd, Ty);
720 WideLoads[BaseRHS]->getLoad() : CreateWideLoad(RHSMul->VecLd, Ty);
724 Acc = CreateSMLAD(WideLHS, WideRHS, Acc, RHSMul->Exchange, InsertAfter);
733 LoadInst *Base = Loads[0];
736 Instruction *BaseSExt = dyn_cast<SExtInst>(Base->user_back());
737 Instruction *OffsetSExt = dyn_cast<SExtInst>(Offset->user_back());
743 [&](Value *A, Value *B) -> void {
744 if (!isa<Instruction>(A) || !isa<Instruction>(B))
750 if (DT->dominates(Source, Sink) ||
751 Source->getParent() != Sink->getParent() ||
752 isa<PHINode>(Source) || isa<PHINode>(Sink))
755 Source->moveBefore(Sink);
756 for (auto &Op : Source->operands())
761 LoadInst *DomLoad = DT->dominates(Base, Offset) ? Base : Offset;
762 IRBuilder<NoFolder> IRB(DomLoad->getParent(),
768 Value *VecPtr = Base->getPointerOperand();
769 LoadInst *WideLoad = IRB.CreateAlignedLoad(LoadTy, VecPtr, Base->getAlign());
772 MoveBefore(Base->getPointerOperand(), VecPtr);
777 // TODO: Support big-endian as well.
778 Value *Bottom = IRB.CreateTrunc(WideLoad, Base->getType());
779 Value *NewBaseSExt = IRB.CreateSExt(Bottom, BaseSExt->getType());
780 BaseSExt->replaceAllUsesWith(NewBaseSExt);
782 IntegerType *OffsetTy = cast<IntegerType>(Offset->getType());
783 Value *ShiftVal = ConstantInt::get(LoadTy, OffsetTy->getBitWidth());
786 Value *NewOffsetSExt = IRB.CreateSExt(Trunc, OffsetSExt->getType());
787 OffsetSExt->replaceAllUsesWith(NewOffsetSExt);
789 LLVM_DEBUG(dbgs() << "From Base and Offset:\n"
790 << *Base << "\n" << *Offset << "\n"
798 WideLoads.emplace(std::make_pair(Base,
809 INITIALIZE_PASS_BEGIN(ARMParallelDSP, "arm-parallel-dsp",
811 INITIALIZE_PASS_END(ARMParallelDSP, "arm-parallel-dsp",