1bdd1243dSDimitry Andric //===- ComplexDeinterleavingPass.cpp --------------------------------------===// 2bdd1243dSDimitry Andric // 3bdd1243dSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4bdd1243dSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5bdd1243dSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6bdd1243dSDimitry Andric // 7bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 8bdd1243dSDimitry Andric // 9bdd1243dSDimitry Andric // Identification: 10bdd1243dSDimitry Andric // This step is responsible for finding the patterns that can be lowered to 11bdd1243dSDimitry Andric // complex instructions, and building a graph to represent the complex 12bdd1243dSDimitry Andric // structures. Starting from the "Converging Shuffle" (a shuffle that 13bdd1243dSDimitry Andric // reinterleaves the complex components, with a mask of <0, 2, 1, 3>), the 14bdd1243dSDimitry Andric // operands are evaluated and identified as "Composite Nodes" (collections of 15bdd1243dSDimitry Andric // instructions that can potentially be lowered to a single complex 16bdd1243dSDimitry Andric // instruction). This is performed by checking the real and imaginary components 17bdd1243dSDimitry Andric // and tracking the data flow for each component while following the operand 18bdd1243dSDimitry Andric // pairs. Validity of each node is expected to be done upon creation, and any 19bdd1243dSDimitry Andric // validation errors should halt traversal and prevent further graph 20bdd1243dSDimitry Andric // construction. 2106c3fb27SDimitry Andric // Instead of relying on Shuffle operations, vector interleaving and 2206c3fb27SDimitry Andric // deinterleaving can be represented by vector.interleave2 and 2306c3fb27SDimitry Andric // vector.deinterleave2 intrinsics. Scalable vectors can be represented only by 2406c3fb27SDimitry Andric // these intrinsics, whereas, fixed-width vectors are recognized for both 2506c3fb27SDimitry Andric // shufflevector instruction and intrinsics. 26bdd1243dSDimitry Andric // 27bdd1243dSDimitry Andric // Replacement: 28bdd1243dSDimitry Andric // This step traverses the graph built up by identification, delegating to the 29bdd1243dSDimitry Andric // target to validate and generate the correct intrinsics, and plumbs them 30bdd1243dSDimitry Andric // together connecting each end of the new intrinsics graph to the existing 31bdd1243dSDimitry Andric // use-def chain. This step is assumed to finish successfully, as all 32bdd1243dSDimitry Andric // information is expected to be correct by this point. 33bdd1243dSDimitry Andric // 34bdd1243dSDimitry Andric // 35bdd1243dSDimitry Andric // Internal data structure: 36bdd1243dSDimitry Andric // ComplexDeinterleavingGraph: 37bdd1243dSDimitry Andric // Keeps references to all the valid CompositeNodes formed as part of the 38bdd1243dSDimitry Andric // transformation, and every Instruction contained within said nodes. It also 39bdd1243dSDimitry Andric // holds onto a reference to the root Instruction, and the root node that should 40bdd1243dSDimitry Andric // replace it. 41bdd1243dSDimitry Andric // 42bdd1243dSDimitry Andric // ComplexDeinterleavingCompositeNode: 43bdd1243dSDimitry Andric // A CompositeNode represents a single transformation point; each node should 44bdd1243dSDimitry Andric // transform into a single complex instruction (ignoring vector splitting, which 45bdd1243dSDimitry Andric // would generate more instructions per node). They are identified in a 46bdd1243dSDimitry Andric // depth-first manner, traversing and identifying the operands of each 47bdd1243dSDimitry Andric // instruction in the order they appear in the IR. 48bdd1243dSDimitry Andric // Each node maintains a reference to its Real and Imaginary instructions, 49bdd1243dSDimitry Andric // as well as any additional instructions that make up the identified operation 50bdd1243dSDimitry Andric // (Internal instructions should only have uses within their containing node). 51bdd1243dSDimitry Andric // A Node also contains the rotation and operation type that it represents. 52bdd1243dSDimitry Andric // Operands contains pointers to other CompositeNodes, acting as the edges in 53bdd1243dSDimitry Andric // the graph. ReplacementValue is the transformed Value* that has been emitted 54bdd1243dSDimitry Andric // to the IR. 55bdd1243dSDimitry Andric // 56bdd1243dSDimitry Andric // Note: If the operation of a Node is Shuffle, only the Real, Imaginary, and 57bdd1243dSDimitry Andric // ReplacementValue fields of that Node are relevant, where the ReplacementValue 58bdd1243dSDimitry Andric // should be pre-populated. 59bdd1243dSDimitry Andric // 60bdd1243dSDimitry Andric //===----------------------------------------------------------------------===// 61bdd1243dSDimitry Andric 62bdd1243dSDimitry Andric #include "llvm/CodeGen/ComplexDeinterleavingPass.h" 63bdd1243dSDimitry Andric #include "llvm/ADT/Statistic.h" 64bdd1243dSDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 65bdd1243dSDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 66bdd1243dSDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 67bdd1243dSDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 68bdd1243dSDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 69bdd1243dSDimitry Andric #include "llvm/IR/IRBuilder.h" 7006c3fb27SDimitry Andric #include "llvm/IR/PatternMatch.h" 71bdd1243dSDimitry Andric #include "llvm/InitializePasses.h" 72bdd1243dSDimitry Andric #include "llvm/Target/TargetMachine.h" 73bdd1243dSDimitry Andric #include "llvm/Transforms/Utils/Local.h" 74bdd1243dSDimitry Andric #include <algorithm> 75bdd1243dSDimitry Andric 76bdd1243dSDimitry Andric using namespace llvm; 77bdd1243dSDimitry Andric using namespace PatternMatch; 78bdd1243dSDimitry Andric 79bdd1243dSDimitry Andric #define DEBUG_TYPE "complex-deinterleaving" 80bdd1243dSDimitry Andric 81bdd1243dSDimitry Andric STATISTIC(NumComplexTransformations, "Amount of complex patterns transformed"); 82bdd1243dSDimitry Andric 83bdd1243dSDimitry Andric static cl::opt<bool> ComplexDeinterleavingEnabled( 84bdd1243dSDimitry Andric "enable-complex-deinterleaving", 85bdd1243dSDimitry Andric cl::desc("Enable generation of complex instructions"), cl::init(true), 86bdd1243dSDimitry Andric cl::Hidden); 87bdd1243dSDimitry Andric 88bdd1243dSDimitry Andric /// Checks the given mask, and determines whether said mask is interleaving. 89bdd1243dSDimitry Andric /// 90bdd1243dSDimitry Andric /// To be interleaving, a mask must alternate between `i` and `i + (Length / 91bdd1243dSDimitry Andric /// 2)`, and must contain all numbers within the range of `[0..Length)` (e.g. a 92bdd1243dSDimitry Andric /// 4x vector interleaving mask would be <0, 2, 1, 3>). 93bdd1243dSDimitry Andric static bool isInterleavingMask(ArrayRef<int> Mask); 94bdd1243dSDimitry Andric 95bdd1243dSDimitry Andric /// Checks the given mask, and determines whether said mask is deinterleaving. 96bdd1243dSDimitry Andric /// 97bdd1243dSDimitry Andric /// To be deinterleaving, a mask must increment in steps of 2, and either start 98bdd1243dSDimitry Andric /// with 0 or 1. 99bdd1243dSDimitry Andric /// (e.g. an 8x vector deinterleaving mask would be either <0, 2, 4, 6> or 100bdd1243dSDimitry Andric /// <1, 3, 5, 7>). 101bdd1243dSDimitry Andric static bool isDeinterleavingMask(ArrayRef<int> Mask); 102bdd1243dSDimitry Andric 10306c3fb27SDimitry Andric /// Returns true if the operation is a negation of V, and it works for both 10406c3fb27SDimitry Andric /// integers and floats. 10506c3fb27SDimitry Andric static bool isNeg(Value *V); 10606c3fb27SDimitry Andric 10706c3fb27SDimitry Andric /// Returns the operand for negation operation. 10806c3fb27SDimitry Andric static Value *getNegOperand(Value *V); 10906c3fb27SDimitry Andric 110bdd1243dSDimitry Andric namespace { 111bdd1243dSDimitry Andric 112bdd1243dSDimitry Andric class ComplexDeinterleavingLegacyPass : public FunctionPass { 113bdd1243dSDimitry Andric public: 114bdd1243dSDimitry Andric static char ID; 115bdd1243dSDimitry Andric 116bdd1243dSDimitry Andric ComplexDeinterleavingLegacyPass(const TargetMachine *TM = nullptr) 117bdd1243dSDimitry Andric : FunctionPass(ID), TM(TM) { 118bdd1243dSDimitry Andric initializeComplexDeinterleavingLegacyPassPass( 119bdd1243dSDimitry Andric *PassRegistry::getPassRegistry()); 120bdd1243dSDimitry Andric } 121bdd1243dSDimitry Andric 122bdd1243dSDimitry Andric StringRef getPassName() const override { 123bdd1243dSDimitry Andric return "Complex Deinterleaving Pass"; 124bdd1243dSDimitry Andric } 125bdd1243dSDimitry Andric 126bdd1243dSDimitry Andric bool runOnFunction(Function &F) override; 127bdd1243dSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 128bdd1243dSDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>(); 129bdd1243dSDimitry Andric AU.setPreservesCFG(); 130bdd1243dSDimitry Andric } 131bdd1243dSDimitry Andric 132bdd1243dSDimitry Andric private: 133bdd1243dSDimitry Andric const TargetMachine *TM; 134bdd1243dSDimitry Andric }; 135bdd1243dSDimitry Andric 136bdd1243dSDimitry Andric class ComplexDeinterleavingGraph; 137bdd1243dSDimitry Andric struct ComplexDeinterleavingCompositeNode { 138bdd1243dSDimitry Andric 139bdd1243dSDimitry Andric ComplexDeinterleavingCompositeNode(ComplexDeinterleavingOperation Op, 14006c3fb27SDimitry Andric Value *R, Value *I) 141bdd1243dSDimitry Andric : Operation(Op), Real(R), Imag(I) {} 142bdd1243dSDimitry Andric 143bdd1243dSDimitry Andric private: 144bdd1243dSDimitry Andric friend class ComplexDeinterleavingGraph; 145bdd1243dSDimitry Andric using NodePtr = std::shared_ptr<ComplexDeinterleavingCompositeNode>; 146bdd1243dSDimitry Andric using RawNodePtr = ComplexDeinterleavingCompositeNode *; 147bdd1243dSDimitry Andric 148bdd1243dSDimitry Andric public: 149bdd1243dSDimitry Andric ComplexDeinterleavingOperation Operation; 15006c3fb27SDimitry Andric Value *Real; 15106c3fb27SDimitry Andric Value *Imag; 152bdd1243dSDimitry Andric 15306c3fb27SDimitry Andric // This two members are required exclusively for generating 15406c3fb27SDimitry Andric // ComplexDeinterleavingOperation::Symmetric operations. 15506c3fb27SDimitry Andric unsigned Opcode; 15606c3fb27SDimitry Andric std::optional<FastMathFlags> Flags; 15706c3fb27SDimitry Andric 15806c3fb27SDimitry Andric ComplexDeinterleavingRotation Rotation = 15906c3fb27SDimitry Andric ComplexDeinterleavingRotation::Rotation_0; 160bdd1243dSDimitry Andric SmallVector<RawNodePtr> Operands; 161bdd1243dSDimitry Andric Value *ReplacementNode = nullptr; 162bdd1243dSDimitry Andric 163bdd1243dSDimitry Andric void addOperand(NodePtr Node) { Operands.push_back(Node.get()); } 164bdd1243dSDimitry Andric 165bdd1243dSDimitry Andric void dump() { dump(dbgs()); } 166bdd1243dSDimitry Andric void dump(raw_ostream &OS) { 167bdd1243dSDimitry Andric auto PrintValue = [&](Value *V) { 168bdd1243dSDimitry Andric if (V) { 169bdd1243dSDimitry Andric OS << "\""; 170bdd1243dSDimitry Andric V->print(OS, true); 171bdd1243dSDimitry Andric OS << "\"\n"; 172bdd1243dSDimitry Andric } else 173bdd1243dSDimitry Andric OS << "nullptr\n"; 174bdd1243dSDimitry Andric }; 175bdd1243dSDimitry Andric auto PrintNodeRef = [&](RawNodePtr Ptr) { 176bdd1243dSDimitry Andric if (Ptr) 177bdd1243dSDimitry Andric OS << Ptr << "\n"; 178bdd1243dSDimitry Andric else 179bdd1243dSDimitry Andric OS << "nullptr\n"; 180bdd1243dSDimitry Andric }; 181bdd1243dSDimitry Andric 182bdd1243dSDimitry Andric OS << "- CompositeNode: " << this << "\n"; 183bdd1243dSDimitry Andric OS << " Real: "; 184bdd1243dSDimitry Andric PrintValue(Real); 185bdd1243dSDimitry Andric OS << " Imag: "; 186bdd1243dSDimitry Andric PrintValue(Imag); 187bdd1243dSDimitry Andric OS << " ReplacementNode: "; 188bdd1243dSDimitry Andric PrintValue(ReplacementNode); 189bdd1243dSDimitry Andric OS << " Operation: " << (int)Operation << "\n"; 190bdd1243dSDimitry Andric OS << " Rotation: " << ((int)Rotation * 90) << "\n"; 191bdd1243dSDimitry Andric OS << " Operands: \n"; 192bdd1243dSDimitry Andric for (const auto &Op : Operands) { 193bdd1243dSDimitry Andric OS << " - "; 194bdd1243dSDimitry Andric PrintNodeRef(Op); 195bdd1243dSDimitry Andric } 196bdd1243dSDimitry Andric } 197bdd1243dSDimitry Andric }; 198bdd1243dSDimitry Andric 199bdd1243dSDimitry Andric class ComplexDeinterleavingGraph { 200bdd1243dSDimitry Andric public: 20106c3fb27SDimitry Andric struct Product { 20206c3fb27SDimitry Andric Value *Multiplier; 20306c3fb27SDimitry Andric Value *Multiplicand; 20406c3fb27SDimitry Andric bool IsPositive; 20506c3fb27SDimitry Andric }; 20606c3fb27SDimitry Andric 20706c3fb27SDimitry Andric using Addend = std::pair<Value *, bool>; 208bdd1243dSDimitry Andric using NodePtr = ComplexDeinterleavingCompositeNode::NodePtr; 209bdd1243dSDimitry Andric using RawNodePtr = ComplexDeinterleavingCompositeNode::RawNodePtr; 21006c3fb27SDimitry Andric 21106c3fb27SDimitry Andric // Helper struct for holding info about potential partial multiplication 21206c3fb27SDimitry Andric // candidates 21306c3fb27SDimitry Andric struct PartialMulCandidate { 21406c3fb27SDimitry Andric Value *Common; 21506c3fb27SDimitry Andric NodePtr Node; 21606c3fb27SDimitry Andric unsigned RealIdx; 21706c3fb27SDimitry Andric unsigned ImagIdx; 21806c3fb27SDimitry Andric bool IsNodeInverted; 21906c3fb27SDimitry Andric }; 22006c3fb27SDimitry Andric 22106c3fb27SDimitry Andric explicit ComplexDeinterleavingGraph(const TargetLowering *TL, 22206c3fb27SDimitry Andric const TargetLibraryInfo *TLI) 22306c3fb27SDimitry Andric : TL(TL), TLI(TLI) {} 224bdd1243dSDimitry Andric 225bdd1243dSDimitry Andric private: 22606c3fb27SDimitry Andric const TargetLowering *TL = nullptr; 22706c3fb27SDimitry Andric const TargetLibraryInfo *TLI = nullptr; 228bdd1243dSDimitry Andric SmallVector<NodePtr> CompositeNodes; 229*8a4dda33SDimitry Andric DenseMap<std::pair<Value *, Value *>, NodePtr> CachedResult; 23006c3fb27SDimitry Andric 23106c3fb27SDimitry Andric SmallPtrSet<Instruction *, 16> FinalInstructions; 23206c3fb27SDimitry Andric 23306c3fb27SDimitry Andric /// Root instructions are instructions from which complex computation starts 23406c3fb27SDimitry Andric std::map<Instruction *, NodePtr> RootToNode; 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric /// Topologically sorted root instructions 23706c3fb27SDimitry Andric SmallVector<Instruction *, 1> OrderedRoots; 23806c3fb27SDimitry Andric 23906c3fb27SDimitry Andric /// When examining a basic block for complex deinterleaving, if it is a simple 24006c3fb27SDimitry Andric /// one-block loop, then the only incoming block is 'Incoming' and the 24106c3fb27SDimitry Andric /// 'BackEdge' block is the block itself." 24206c3fb27SDimitry Andric BasicBlock *BackEdge = nullptr; 24306c3fb27SDimitry Andric BasicBlock *Incoming = nullptr; 24406c3fb27SDimitry Andric 24506c3fb27SDimitry Andric /// ReductionInfo maps from %ReductionOp to %PHInode and Instruction 24606c3fb27SDimitry Andric /// %OutsideUser as it is shown in the IR: 24706c3fb27SDimitry Andric /// 24806c3fb27SDimitry Andric /// vector.body: 24906c3fb27SDimitry Andric /// %PHInode = phi <vector type> [ zeroinitializer, %entry ], 25006c3fb27SDimitry Andric /// [ %ReductionOp, %vector.body ] 25106c3fb27SDimitry Andric /// ... 25206c3fb27SDimitry Andric /// %ReductionOp = fadd i64 ... 25306c3fb27SDimitry Andric /// ... 25406c3fb27SDimitry Andric /// br i1 %condition, label %vector.body, %middle.block 25506c3fb27SDimitry Andric /// 25606c3fb27SDimitry Andric /// middle.block: 25706c3fb27SDimitry Andric /// %OutsideUser = llvm.vector.reduce.fadd(..., %ReductionOp) 25806c3fb27SDimitry Andric /// 25906c3fb27SDimitry Andric /// %OutsideUser can be `llvm.vector.reduce.fadd` or `fadd` preceding 26006c3fb27SDimitry Andric /// `llvm.vector.reduce.fadd` when unroll factor isn't one. 26106c3fb27SDimitry Andric std::map<Instruction *, std::pair<PHINode *, Instruction *>> ReductionInfo; 26206c3fb27SDimitry Andric 26306c3fb27SDimitry Andric /// In the process of detecting a reduction, we consider a pair of 26406c3fb27SDimitry Andric /// %ReductionOP, which we refer to as real and imag (or vice versa), and 26506c3fb27SDimitry Andric /// traverse the use-tree to detect complex operations. As this is a reduction 26606c3fb27SDimitry Andric /// operation, it will eventually reach RealPHI and ImagPHI, which corresponds 26706c3fb27SDimitry Andric /// to the %ReductionOPs that we suspect to be complex. 26806c3fb27SDimitry Andric /// RealPHI and ImagPHI are used by the identifyPHINode method. 26906c3fb27SDimitry Andric PHINode *RealPHI = nullptr; 27006c3fb27SDimitry Andric PHINode *ImagPHI = nullptr; 27106c3fb27SDimitry Andric 27206c3fb27SDimitry Andric /// Set this flag to true if RealPHI and ImagPHI were reached during reduction 27306c3fb27SDimitry Andric /// detection. 27406c3fb27SDimitry Andric bool PHIsFound = false; 27506c3fb27SDimitry Andric 27606c3fb27SDimitry Andric /// OldToNewPHI maps the original real PHINode to a new, double-sized PHINode. 27706c3fb27SDimitry Andric /// The new PHINode corresponds to a vector of deinterleaved complex numbers. 27806c3fb27SDimitry Andric /// This mapping is populated during 27906c3fb27SDimitry Andric /// ComplexDeinterleavingOperation::ReductionPHI node replacement. It is then 28006c3fb27SDimitry Andric /// used in the ComplexDeinterleavingOperation::ReductionOperation node 28106c3fb27SDimitry Andric /// replacement process. 28206c3fb27SDimitry Andric std::map<PHINode *, PHINode *> OldToNewPHI; 283bdd1243dSDimitry Andric 284bdd1243dSDimitry Andric NodePtr prepareCompositeNode(ComplexDeinterleavingOperation Operation, 28506c3fb27SDimitry Andric Value *R, Value *I) { 28606c3fb27SDimitry Andric assert(((Operation != ComplexDeinterleavingOperation::ReductionPHI && 28706c3fb27SDimitry Andric Operation != ComplexDeinterleavingOperation::ReductionOperation) || 28806c3fb27SDimitry Andric (R && I)) && 28906c3fb27SDimitry Andric "Reduction related nodes must have Real and Imaginary parts"); 290bdd1243dSDimitry Andric return std::make_shared<ComplexDeinterleavingCompositeNode>(Operation, R, 291bdd1243dSDimitry Andric I); 292bdd1243dSDimitry Andric } 293bdd1243dSDimitry Andric 294bdd1243dSDimitry Andric NodePtr submitCompositeNode(NodePtr Node) { 295bdd1243dSDimitry Andric CompositeNodes.push_back(Node); 296*8a4dda33SDimitry Andric if (Node->Real && Node->Imag) 297*8a4dda33SDimitry Andric CachedResult[{Node->Real, Node->Imag}] = Node; 298bdd1243dSDimitry Andric return Node; 299bdd1243dSDimitry Andric } 300bdd1243dSDimitry Andric 301bdd1243dSDimitry Andric /// Identifies a complex partial multiply pattern and its rotation, based on 302bdd1243dSDimitry Andric /// the following patterns 303bdd1243dSDimitry Andric /// 304bdd1243dSDimitry Andric /// 0: r: cr + ar * br 305bdd1243dSDimitry Andric /// i: ci + ar * bi 306bdd1243dSDimitry Andric /// 90: r: cr - ai * bi 307bdd1243dSDimitry Andric /// i: ci + ai * br 308bdd1243dSDimitry Andric /// 180: r: cr - ar * br 309bdd1243dSDimitry Andric /// i: ci - ar * bi 310bdd1243dSDimitry Andric /// 270: r: cr + ai * bi 311bdd1243dSDimitry Andric /// i: ci - ai * br 312bdd1243dSDimitry Andric NodePtr identifyPartialMul(Instruction *Real, Instruction *Imag); 313bdd1243dSDimitry Andric 314bdd1243dSDimitry Andric /// Identify the other branch of a Partial Mul, taking the CommonOperandI that 315bdd1243dSDimitry Andric /// is partially known from identifyPartialMul, filling in the other half of 316bdd1243dSDimitry Andric /// the complex pair. 31706c3fb27SDimitry Andric NodePtr 31806c3fb27SDimitry Andric identifyNodeWithImplicitAdd(Instruction *I, Instruction *J, 31906c3fb27SDimitry Andric std::pair<Value *, Value *> &CommonOperandI); 320bdd1243dSDimitry Andric 321bdd1243dSDimitry Andric /// Identifies a complex add pattern and its rotation, based on the following 322bdd1243dSDimitry Andric /// patterns. 323bdd1243dSDimitry Andric /// 324bdd1243dSDimitry Andric /// 90: r: ar - bi 325bdd1243dSDimitry Andric /// i: ai + br 326bdd1243dSDimitry Andric /// 270: r: ar + bi 327bdd1243dSDimitry Andric /// i: ai - br 328bdd1243dSDimitry Andric NodePtr identifyAdd(Instruction *Real, Instruction *Imag); 32906c3fb27SDimitry Andric NodePtr identifySymmetricOperation(Instruction *Real, Instruction *Imag); 330bdd1243dSDimitry Andric 33106c3fb27SDimitry Andric NodePtr identifyNode(Value *R, Value *I); 332bdd1243dSDimitry Andric 33306c3fb27SDimitry Andric /// Determine if a sum of complex numbers can be formed from \p RealAddends 33406c3fb27SDimitry Andric /// and \p ImagAddens. If \p Accumulator is not null, add the result to it. 33506c3fb27SDimitry Andric /// Return nullptr if it is not possible to construct a complex number. 33606c3fb27SDimitry Andric /// \p Flags are needed to generate symmetric Add and Sub operations. 33706c3fb27SDimitry Andric NodePtr identifyAdditions(std::list<Addend> &RealAddends, 33806c3fb27SDimitry Andric std::list<Addend> &ImagAddends, 33906c3fb27SDimitry Andric std::optional<FastMathFlags> Flags, 34006c3fb27SDimitry Andric NodePtr Accumulator); 34106c3fb27SDimitry Andric 34206c3fb27SDimitry Andric /// Extract one addend that have both real and imaginary parts positive. 34306c3fb27SDimitry Andric NodePtr extractPositiveAddend(std::list<Addend> &RealAddends, 34406c3fb27SDimitry Andric std::list<Addend> &ImagAddends); 34506c3fb27SDimitry Andric 34606c3fb27SDimitry Andric /// Determine if sum of multiplications of complex numbers can be formed from 34706c3fb27SDimitry Andric /// \p RealMuls and \p ImagMuls. If \p Accumulator is not null, add the result 34806c3fb27SDimitry Andric /// to it. Return nullptr if it is not possible to construct a complex number. 34906c3fb27SDimitry Andric NodePtr identifyMultiplications(std::vector<Product> &RealMuls, 35006c3fb27SDimitry Andric std::vector<Product> &ImagMuls, 35106c3fb27SDimitry Andric NodePtr Accumulator); 35206c3fb27SDimitry Andric 35306c3fb27SDimitry Andric /// Go through pairs of multiplication (one Real and one Imag) and find all 35406c3fb27SDimitry Andric /// possible candidates for partial multiplication and put them into \p 35506c3fb27SDimitry Andric /// Candidates. Returns true if all Product has pair with common operand 35606c3fb27SDimitry Andric bool collectPartialMuls(const std::vector<Product> &RealMuls, 35706c3fb27SDimitry Andric const std::vector<Product> &ImagMuls, 35806c3fb27SDimitry Andric std::vector<PartialMulCandidate> &Candidates); 35906c3fb27SDimitry Andric 36006c3fb27SDimitry Andric /// If the code is compiled with -Ofast or expressions have `reassoc` flag, 36106c3fb27SDimitry Andric /// the order of complex computation operations may be significantly altered, 36206c3fb27SDimitry Andric /// and the real and imaginary parts may not be executed in parallel. This 36306c3fb27SDimitry Andric /// function takes this into consideration and employs a more general approach 36406c3fb27SDimitry Andric /// to identify complex computations. Initially, it gathers all the addends 36506c3fb27SDimitry Andric /// and multiplicands and then constructs a complex expression from them. 36606c3fb27SDimitry Andric NodePtr identifyReassocNodes(Instruction *I, Instruction *J); 36706c3fb27SDimitry Andric 36806c3fb27SDimitry Andric NodePtr identifyRoot(Instruction *I); 36906c3fb27SDimitry Andric 37006c3fb27SDimitry Andric /// Identifies the Deinterleave operation applied to a vector containing 37106c3fb27SDimitry Andric /// complex numbers. There are two ways to represent the Deinterleave 37206c3fb27SDimitry Andric /// operation: 37306c3fb27SDimitry Andric /// * Using two shufflevectors with even indices for /pReal instruction and 37406c3fb27SDimitry Andric /// odd indices for /pImag instructions (only for fixed-width vectors) 37506c3fb27SDimitry Andric /// * Using two extractvalue instructions applied to `vector.deinterleave2` 37606c3fb27SDimitry Andric /// intrinsic (for both fixed and scalable vectors) 37706c3fb27SDimitry Andric NodePtr identifyDeinterleave(Instruction *Real, Instruction *Imag); 37806c3fb27SDimitry Andric 37906c3fb27SDimitry Andric /// identifying the operation that represents a complex number repeated in a 38006c3fb27SDimitry Andric /// Splat vector. There are two possible types of splats: ConstantExpr with 38106c3fb27SDimitry Andric /// the opcode ShuffleVector and ShuffleVectorInstr. Both should have an 38206c3fb27SDimitry Andric /// initialization mask with all values set to zero. 38306c3fb27SDimitry Andric NodePtr identifySplat(Value *Real, Value *Imag); 38406c3fb27SDimitry Andric 38506c3fb27SDimitry Andric NodePtr identifyPHINode(Instruction *Real, Instruction *Imag); 38606c3fb27SDimitry Andric 38706c3fb27SDimitry Andric /// Identifies SelectInsts in a loop that has reduction with predication masks 38806c3fb27SDimitry Andric /// and/or predicated tail folding 38906c3fb27SDimitry Andric NodePtr identifySelectNode(Instruction *Real, Instruction *Imag); 39006c3fb27SDimitry Andric 39106c3fb27SDimitry Andric Value *replaceNode(IRBuilderBase &Builder, RawNodePtr Node); 39206c3fb27SDimitry Andric 39306c3fb27SDimitry Andric /// Complete IR modifications after producing new reduction operation: 39406c3fb27SDimitry Andric /// * Populate the PHINode generated for 39506c3fb27SDimitry Andric /// ComplexDeinterleavingOperation::ReductionPHI 39606c3fb27SDimitry Andric /// * Deinterleave the final value outside of the loop and repurpose original 39706c3fb27SDimitry Andric /// reduction users 39806c3fb27SDimitry Andric void processReductionOperation(Value *OperationReplacement, RawNodePtr Node); 399bdd1243dSDimitry Andric 400bdd1243dSDimitry Andric public: 401bdd1243dSDimitry Andric void dump() { dump(dbgs()); } 402bdd1243dSDimitry Andric void dump(raw_ostream &OS) { 403bdd1243dSDimitry Andric for (const auto &Node : CompositeNodes) 404bdd1243dSDimitry Andric Node->dump(OS); 405bdd1243dSDimitry Andric } 406bdd1243dSDimitry Andric 407bdd1243dSDimitry Andric /// Returns false if the deinterleaving operation should be cancelled for the 408bdd1243dSDimitry Andric /// current graph. 409bdd1243dSDimitry Andric bool identifyNodes(Instruction *RootI); 410bdd1243dSDimitry Andric 41106c3fb27SDimitry Andric /// In case \pB is one-block loop, this function seeks potential reductions 41206c3fb27SDimitry Andric /// and populates ReductionInfo. Returns true if any reductions were 41306c3fb27SDimitry Andric /// identified. 41406c3fb27SDimitry Andric bool collectPotentialReductions(BasicBlock *B); 41506c3fb27SDimitry Andric 41606c3fb27SDimitry Andric void identifyReductionNodes(); 41706c3fb27SDimitry Andric 41806c3fb27SDimitry Andric /// Check that every instruction, from the roots to the leaves, has internal 41906c3fb27SDimitry Andric /// uses. 42006c3fb27SDimitry Andric bool checkNodes(); 42106c3fb27SDimitry Andric 422bdd1243dSDimitry Andric /// Perform the actual replacement of the underlying instruction graph. 423bdd1243dSDimitry Andric void replaceNodes(); 424bdd1243dSDimitry Andric }; 425bdd1243dSDimitry Andric 426bdd1243dSDimitry Andric class ComplexDeinterleaving { 427bdd1243dSDimitry Andric public: 428bdd1243dSDimitry Andric ComplexDeinterleaving(const TargetLowering *tl, const TargetLibraryInfo *tli) 429bdd1243dSDimitry Andric : TL(tl), TLI(tli) {} 430bdd1243dSDimitry Andric bool runOnFunction(Function &F); 431bdd1243dSDimitry Andric 432bdd1243dSDimitry Andric private: 433bdd1243dSDimitry Andric bool evaluateBasicBlock(BasicBlock *B); 434bdd1243dSDimitry Andric 435bdd1243dSDimitry Andric const TargetLowering *TL = nullptr; 436bdd1243dSDimitry Andric const TargetLibraryInfo *TLI = nullptr; 437bdd1243dSDimitry Andric }; 438bdd1243dSDimitry Andric 439bdd1243dSDimitry Andric } // namespace 440bdd1243dSDimitry Andric 441bdd1243dSDimitry Andric char ComplexDeinterleavingLegacyPass::ID = 0; 442bdd1243dSDimitry Andric 443bdd1243dSDimitry Andric INITIALIZE_PASS_BEGIN(ComplexDeinterleavingLegacyPass, DEBUG_TYPE, 444bdd1243dSDimitry Andric "Complex Deinterleaving", false, false) 445bdd1243dSDimitry Andric INITIALIZE_PASS_END(ComplexDeinterleavingLegacyPass, DEBUG_TYPE, 446bdd1243dSDimitry Andric "Complex Deinterleaving", false, false) 447bdd1243dSDimitry Andric 448bdd1243dSDimitry Andric PreservedAnalyses ComplexDeinterleavingPass::run(Function &F, 449bdd1243dSDimitry Andric FunctionAnalysisManager &AM) { 450bdd1243dSDimitry Andric const TargetLowering *TL = TM->getSubtargetImpl(F)->getTargetLowering(); 451bdd1243dSDimitry Andric auto &TLI = AM.getResult<llvm::TargetLibraryAnalysis>(F); 452bdd1243dSDimitry Andric if (!ComplexDeinterleaving(TL, &TLI).runOnFunction(F)) 453bdd1243dSDimitry Andric return PreservedAnalyses::all(); 454bdd1243dSDimitry Andric 455bdd1243dSDimitry Andric PreservedAnalyses PA; 456bdd1243dSDimitry Andric PA.preserve<FunctionAnalysisManagerModuleProxy>(); 457bdd1243dSDimitry Andric return PA; 458bdd1243dSDimitry Andric } 459bdd1243dSDimitry Andric 460bdd1243dSDimitry Andric FunctionPass *llvm::createComplexDeinterleavingPass(const TargetMachine *TM) { 461bdd1243dSDimitry Andric return new ComplexDeinterleavingLegacyPass(TM); 462bdd1243dSDimitry Andric } 463bdd1243dSDimitry Andric 464bdd1243dSDimitry Andric bool ComplexDeinterleavingLegacyPass::runOnFunction(Function &F) { 465bdd1243dSDimitry Andric const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering(); 466bdd1243dSDimitry Andric auto TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); 467bdd1243dSDimitry Andric return ComplexDeinterleaving(TL, &TLI).runOnFunction(F); 468bdd1243dSDimitry Andric } 469bdd1243dSDimitry Andric 470bdd1243dSDimitry Andric bool ComplexDeinterleaving::runOnFunction(Function &F) { 471bdd1243dSDimitry Andric if (!ComplexDeinterleavingEnabled) { 472bdd1243dSDimitry Andric LLVM_DEBUG( 473bdd1243dSDimitry Andric dbgs() << "Complex deinterleaving has been explicitly disabled.\n"); 474bdd1243dSDimitry Andric return false; 475bdd1243dSDimitry Andric } 476bdd1243dSDimitry Andric 477bdd1243dSDimitry Andric if (!TL->isComplexDeinterleavingSupported()) { 478bdd1243dSDimitry Andric LLVM_DEBUG( 479bdd1243dSDimitry Andric dbgs() << "Complex deinterleaving has been disabled, target does " 480bdd1243dSDimitry Andric "not support lowering of complex number operations.\n"); 481bdd1243dSDimitry Andric return false; 482bdd1243dSDimitry Andric } 483bdd1243dSDimitry Andric 484bdd1243dSDimitry Andric bool Changed = false; 485bdd1243dSDimitry Andric for (auto &B : F) 486bdd1243dSDimitry Andric Changed |= evaluateBasicBlock(&B); 487bdd1243dSDimitry Andric 488bdd1243dSDimitry Andric return Changed; 489bdd1243dSDimitry Andric } 490bdd1243dSDimitry Andric 491bdd1243dSDimitry Andric static bool isInterleavingMask(ArrayRef<int> Mask) { 492bdd1243dSDimitry Andric // If the size is not even, it's not an interleaving mask 493bdd1243dSDimitry Andric if ((Mask.size() & 1)) 494bdd1243dSDimitry Andric return false; 495bdd1243dSDimitry Andric 496bdd1243dSDimitry Andric int HalfNumElements = Mask.size() / 2; 497bdd1243dSDimitry Andric for (int Idx = 0; Idx < HalfNumElements; ++Idx) { 498bdd1243dSDimitry Andric int MaskIdx = Idx * 2; 499bdd1243dSDimitry Andric if (Mask[MaskIdx] != Idx || Mask[MaskIdx + 1] != (Idx + HalfNumElements)) 500bdd1243dSDimitry Andric return false; 501bdd1243dSDimitry Andric } 502bdd1243dSDimitry Andric 503bdd1243dSDimitry Andric return true; 504bdd1243dSDimitry Andric } 505bdd1243dSDimitry Andric 506bdd1243dSDimitry Andric static bool isDeinterleavingMask(ArrayRef<int> Mask) { 507bdd1243dSDimitry Andric int Offset = Mask[0]; 508bdd1243dSDimitry Andric int HalfNumElements = Mask.size() / 2; 509bdd1243dSDimitry Andric 510bdd1243dSDimitry Andric for (int Idx = 1; Idx < HalfNumElements; ++Idx) { 511bdd1243dSDimitry Andric if (Mask[Idx] != (Idx * 2) + Offset) 512bdd1243dSDimitry Andric return false; 513bdd1243dSDimitry Andric } 514bdd1243dSDimitry Andric 515bdd1243dSDimitry Andric return true; 516bdd1243dSDimitry Andric } 517bdd1243dSDimitry Andric 51806c3fb27SDimitry Andric bool isNeg(Value *V) { 51906c3fb27SDimitry Andric return match(V, m_FNeg(m_Value())) || match(V, m_Neg(m_Value())); 52006c3fb27SDimitry Andric } 52106c3fb27SDimitry Andric 52206c3fb27SDimitry Andric Value *getNegOperand(Value *V) { 52306c3fb27SDimitry Andric assert(isNeg(V)); 52406c3fb27SDimitry Andric auto *I = cast<Instruction>(V); 52506c3fb27SDimitry Andric if (I->getOpcode() == Instruction::FNeg) 52606c3fb27SDimitry Andric return I->getOperand(0); 52706c3fb27SDimitry Andric 52806c3fb27SDimitry Andric return I->getOperand(1); 52906c3fb27SDimitry Andric } 53006c3fb27SDimitry Andric 531bdd1243dSDimitry Andric bool ComplexDeinterleaving::evaluateBasicBlock(BasicBlock *B) { 53206c3fb27SDimitry Andric ComplexDeinterleavingGraph Graph(TL, TLI); 53306c3fb27SDimitry Andric if (Graph.collectPotentialReductions(B)) 53406c3fb27SDimitry Andric Graph.identifyReductionNodes(); 535bdd1243dSDimitry Andric 53606c3fb27SDimitry Andric for (auto &I : *B) 53706c3fb27SDimitry Andric Graph.identifyNodes(&I); 538bdd1243dSDimitry Andric 53906c3fb27SDimitry Andric if (Graph.checkNodes()) { 540bdd1243dSDimitry Andric Graph.replaceNodes(); 54106c3fb27SDimitry Andric return true; 542bdd1243dSDimitry Andric } 543bdd1243dSDimitry Andric 54406c3fb27SDimitry Andric return false; 545bdd1243dSDimitry Andric } 546bdd1243dSDimitry Andric 547bdd1243dSDimitry Andric ComplexDeinterleavingGraph::NodePtr 548bdd1243dSDimitry Andric ComplexDeinterleavingGraph::identifyNodeWithImplicitAdd( 549bdd1243dSDimitry Andric Instruction *Real, Instruction *Imag, 55006c3fb27SDimitry Andric std::pair<Value *, Value *> &PartialMatch) { 551bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "identifyNodeWithImplicitAdd " << *Real << " / " << *Imag 552bdd1243dSDimitry Andric << "\n"); 553bdd1243dSDimitry Andric 554bdd1243dSDimitry Andric if (!Real->hasOneUse() || !Imag->hasOneUse()) { 555bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Mul operand has multiple uses.\n"); 556bdd1243dSDimitry Andric return nullptr; 557bdd1243dSDimitry Andric } 558bdd1243dSDimitry Andric 55906c3fb27SDimitry Andric if ((Real->getOpcode() != Instruction::FMul && 56006c3fb27SDimitry Andric Real->getOpcode() != Instruction::Mul) || 56106c3fb27SDimitry Andric (Imag->getOpcode() != Instruction::FMul && 56206c3fb27SDimitry Andric Imag->getOpcode() != Instruction::Mul)) { 56306c3fb27SDimitry Andric LLVM_DEBUG( 56406c3fb27SDimitry Andric dbgs() << " - Real or imaginary instruction is not fmul or mul\n"); 565bdd1243dSDimitry Andric return nullptr; 566bdd1243dSDimitry Andric } 567bdd1243dSDimitry Andric 56806c3fb27SDimitry Andric Value *R0 = Real->getOperand(0); 56906c3fb27SDimitry Andric Value *R1 = Real->getOperand(1); 57006c3fb27SDimitry Andric Value *I0 = Imag->getOperand(0); 57106c3fb27SDimitry Andric Value *I1 = Imag->getOperand(1); 572bdd1243dSDimitry Andric 573bdd1243dSDimitry Andric // A +/+ has a rotation of 0. If any of the operands are fneg, we flip the 574bdd1243dSDimitry Andric // rotations and use the operand. 575bdd1243dSDimitry Andric unsigned Negs = 0; 57606c3fb27SDimitry Andric Value *Op; 57706c3fb27SDimitry Andric if (match(R0, m_Neg(m_Value(Op)))) { 578bdd1243dSDimitry Andric Negs |= 1; 57906c3fb27SDimitry Andric R0 = Op; 58006c3fb27SDimitry Andric } else if (match(R1, m_Neg(m_Value(Op)))) { 58106c3fb27SDimitry Andric Negs |= 1; 58206c3fb27SDimitry Andric R1 = Op; 583bdd1243dSDimitry Andric } 58406c3fb27SDimitry Andric 58506c3fb27SDimitry Andric if (isNeg(I0)) { 586bdd1243dSDimitry Andric Negs |= 2; 587bdd1243dSDimitry Andric Negs ^= 1; 58806c3fb27SDimitry Andric I0 = Op; 58906c3fb27SDimitry Andric } else if (match(I1, m_Neg(m_Value(Op)))) { 59006c3fb27SDimitry Andric Negs |= 2; 59106c3fb27SDimitry Andric Negs ^= 1; 59206c3fb27SDimitry Andric I1 = Op; 593bdd1243dSDimitry Andric } 594bdd1243dSDimitry Andric 595bdd1243dSDimitry Andric ComplexDeinterleavingRotation Rotation = (ComplexDeinterleavingRotation)Negs; 596bdd1243dSDimitry Andric 59706c3fb27SDimitry Andric Value *CommonOperand; 59806c3fb27SDimitry Andric Value *UncommonRealOp; 59906c3fb27SDimitry Andric Value *UncommonImagOp; 600bdd1243dSDimitry Andric 601bdd1243dSDimitry Andric if (R0 == I0 || R0 == I1) { 602bdd1243dSDimitry Andric CommonOperand = R0; 603bdd1243dSDimitry Andric UncommonRealOp = R1; 604bdd1243dSDimitry Andric } else if (R1 == I0 || R1 == I1) { 605bdd1243dSDimitry Andric CommonOperand = R1; 606bdd1243dSDimitry Andric UncommonRealOp = R0; 607bdd1243dSDimitry Andric } else { 608bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - No equal operand\n"); 609bdd1243dSDimitry Andric return nullptr; 610bdd1243dSDimitry Andric } 611bdd1243dSDimitry Andric 612bdd1243dSDimitry Andric UncommonImagOp = (CommonOperand == I0) ? I1 : I0; 613bdd1243dSDimitry Andric if (Rotation == ComplexDeinterleavingRotation::Rotation_90 || 614bdd1243dSDimitry Andric Rotation == ComplexDeinterleavingRotation::Rotation_270) 615bdd1243dSDimitry Andric std::swap(UncommonRealOp, UncommonImagOp); 616bdd1243dSDimitry Andric 617bdd1243dSDimitry Andric // Between identifyPartialMul and here we need to have found a complete valid 618bdd1243dSDimitry Andric // pair from the CommonOperand of each part. 619bdd1243dSDimitry Andric if (Rotation == ComplexDeinterleavingRotation::Rotation_0 || 620bdd1243dSDimitry Andric Rotation == ComplexDeinterleavingRotation::Rotation_180) 621bdd1243dSDimitry Andric PartialMatch.first = CommonOperand; 622bdd1243dSDimitry Andric else 623bdd1243dSDimitry Andric PartialMatch.second = CommonOperand; 624bdd1243dSDimitry Andric 625bdd1243dSDimitry Andric if (!PartialMatch.first || !PartialMatch.second) { 626bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Incomplete partial match\n"); 627bdd1243dSDimitry Andric return nullptr; 628bdd1243dSDimitry Andric } 629bdd1243dSDimitry Andric 630bdd1243dSDimitry Andric NodePtr CommonNode = identifyNode(PartialMatch.first, PartialMatch.second); 631bdd1243dSDimitry Andric if (!CommonNode) { 632bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - No CommonNode identified\n"); 633bdd1243dSDimitry Andric return nullptr; 634bdd1243dSDimitry Andric } 635bdd1243dSDimitry Andric 636bdd1243dSDimitry Andric NodePtr UncommonNode = identifyNode(UncommonRealOp, UncommonImagOp); 637bdd1243dSDimitry Andric if (!UncommonNode) { 638bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - No UncommonNode identified\n"); 639bdd1243dSDimitry Andric return nullptr; 640bdd1243dSDimitry Andric } 641bdd1243dSDimitry Andric 642bdd1243dSDimitry Andric NodePtr Node = prepareCompositeNode( 643bdd1243dSDimitry Andric ComplexDeinterleavingOperation::CMulPartial, Real, Imag); 644bdd1243dSDimitry Andric Node->Rotation = Rotation; 645bdd1243dSDimitry Andric Node->addOperand(CommonNode); 646bdd1243dSDimitry Andric Node->addOperand(UncommonNode); 647bdd1243dSDimitry Andric return submitCompositeNode(Node); 648bdd1243dSDimitry Andric } 649bdd1243dSDimitry Andric 650bdd1243dSDimitry Andric ComplexDeinterleavingGraph::NodePtr 651bdd1243dSDimitry Andric ComplexDeinterleavingGraph::identifyPartialMul(Instruction *Real, 652bdd1243dSDimitry Andric Instruction *Imag) { 653bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "identifyPartialMul " << *Real << " / " << *Imag 654bdd1243dSDimitry Andric << "\n"); 655bdd1243dSDimitry Andric // Determine rotation 65606c3fb27SDimitry Andric auto IsAdd = [](unsigned Op) { 65706c3fb27SDimitry Andric return Op == Instruction::FAdd || Op == Instruction::Add; 65806c3fb27SDimitry Andric }; 65906c3fb27SDimitry Andric auto IsSub = [](unsigned Op) { 66006c3fb27SDimitry Andric return Op == Instruction::FSub || Op == Instruction::Sub; 66106c3fb27SDimitry Andric }; 662bdd1243dSDimitry Andric ComplexDeinterleavingRotation Rotation; 66306c3fb27SDimitry Andric if (IsAdd(Real->getOpcode()) && IsAdd(Imag->getOpcode())) 664bdd1243dSDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_0; 66506c3fb27SDimitry Andric else if (IsSub(Real->getOpcode()) && IsAdd(Imag->getOpcode())) 666bdd1243dSDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_90; 66706c3fb27SDimitry Andric else if (IsSub(Real->getOpcode()) && IsSub(Imag->getOpcode())) 668bdd1243dSDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_180; 66906c3fb27SDimitry Andric else if (IsAdd(Real->getOpcode()) && IsSub(Imag->getOpcode())) 670bdd1243dSDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_270; 671bdd1243dSDimitry Andric else { 672bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Unhandled rotation.\n"); 673bdd1243dSDimitry Andric return nullptr; 674bdd1243dSDimitry Andric } 675bdd1243dSDimitry Andric 67606c3fb27SDimitry Andric if (isa<FPMathOperator>(Real) && 67706c3fb27SDimitry Andric (!Real->getFastMathFlags().allowContract() || 67806c3fb27SDimitry Andric !Imag->getFastMathFlags().allowContract())) { 679bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Contract is missing from the FastMath flags.\n"); 680bdd1243dSDimitry Andric return nullptr; 681bdd1243dSDimitry Andric } 682bdd1243dSDimitry Andric 683bdd1243dSDimitry Andric Value *CR = Real->getOperand(0); 684bdd1243dSDimitry Andric Instruction *RealMulI = dyn_cast<Instruction>(Real->getOperand(1)); 685bdd1243dSDimitry Andric if (!RealMulI) 686bdd1243dSDimitry Andric return nullptr; 687bdd1243dSDimitry Andric Value *CI = Imag->getOperand(0); 688bdd1243dSDimitry Andric Instruction *ImagMulI = dyn_cast<Instruction>(Imag->getOperand(1)); 689bdd1243dSDimitry Andric if (!ImagMulI) 690bdd1243dSDimitry Andric return nullptr; 691bdd1243dSDimitry Andric 692bdd1243dSDimitry Andric if (!RealMulI->hasOneUse() || !ImagMulI->hasOneUse()) { 693bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Mul instruction has multiple uses\n"); 694bdd1243dSDimitry Andric return nullptr; 695bdd1243dSDimitry Andric } 696bdd1243dSDimitry Andric 69706c3fb27SDimitry Andric Value *R0 = RealMulI->getOperand(0); 69806c3fb27SDimitry Andric Value *R1 = RealMulI->getOperand(1); 69906c3fb27SDimitry Andric Value *I0 = ImagMulI->getOperand(0); 70006c3fb27SDimitry Andric Value *I1 = ImagMulI->getOperand(1); 701bdd1243dSDimitry Andric 70206c3fb27SDimitry Andric Value *CommonOperand; 70306c3fb27SDimitry Andric Value *UncommonRealOp; 70406c3fb27SDimitry Andric Value *UncommonImagOp; 705bdd1243dSDimitry Andric 706bdd1243dSDimitry Andric if (R0 == I0 || R0 == I1) { 707bdd1243dSDimitry Andric CommonOperand = R0; 708bdd1243dSDimitry Andric UncommonRealOp = R1; 709bdd1243dSDimitry Andric } else if (R1 == I0 || R1 == I1) { 710bdd1243dSDimitry Andric CommonOperand = R1; 711bdd1243dSDimitry Andric UncommonRealOp = R0; 712bdd1243dSDimitry Andric } else { 713bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - No equal operand\n"); 714bdd1243dSDimitry Andric return nullptr; 715bdd1243dSDimitry Andric } 716bdd1243dSDimitry Andric 717bdd1243dSDimitry Andric UncommonImagOp = (CommonOperand == I0) ? I1 : I0; 718bdd1243dSDimitry Andric if (Rotation == ComplexDeinterleavingRotation::Rotation_90 || 719bdd1243dSDimitry Andric Rotation == ComplexDeinterleavingRotation::Rotation_270) 720bdd1243dSDimitry Andric std::swap(UncommonRealOp, UncommonImagOp); 721bdd1243dSDimitry Andric 72206c3fb27SDimitry Andric std::pair<Value *, Value *> PartialMatch( 723bdd1243dSDimitry Andric (Rotation == ComplexDeinterleavingRotation::Rotation_0 || 724bdd1243dSDimitry Andric Rotation == ComplexDeinterleavingRotation::Rotation_180) 725bdd1243dSDimitry Andric ? CommonOperand 726bdd1243dSDimitry Andric : nullptr, 727bdd1243dSDimitry Andric (Rotation == ComplexDeinterleavingRotation::Rotation_90 || 728bdd1243dSDimitry Andric Rotation == ComplexDeinterleavingRotation::Rotation_270) 729bdd1243dSDimitry Andric ? CommonOperand 730bdd1243dSDimitry Andric : nullptr); 73106c3fb27SDimitry Andric 73206c3fb27SDimitry Andric auto *CRInst = dyn_cast<Instruction>(CR); 73306c3fb27SDimitry Andric auto *CIInst = dyn_cast<Instruction>(CI); 73406c3fb27SDimitry Andric 73506c3fb27SDimitry Andric if (!CRInst || !CIInst) { 73606c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " - Common operands are not instructions.\n"); 73706c3fb27SDimitry Andric return nullptr; 73806c3fb27SDimitry Andric } 73906c3fb27SDimitry Andric 74006c3fb27SDimitry Andric NodePtr CNode = identifyNodeWithImplicitAdd(CRInst, CIInst, PartialMatch); 741bdd1243dSDimitry Andric if (!CNode) { 742bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - No cnode identified\n"); 743bdd1243dSDimitry Andric return nullptr; 744bdd1243dSDimitry Andric } 745bdd1243dSDimitry Andric 746bdd1243dSDimitry Andric NodePtr UncommonRes = identifyNode(UncommonRealOp, UncommonImagOp); 747bdd1243dSDimitry Andric if (!UncommonRes) { 748bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - No UncommonRes identified\n"); 749bdd1243dSDimitry Andric return nullptr; 750bdd1243dSDimitry Andric } 751bdd1243dSDimitry Andric 752bdd1243dSDimitry Andric assert(PartialMatch.first && PartialMatch.second); 753bdd1243dSDimitry Andric NodePtr CommonRes = identifyNode(PartialMatch.first, PartialMatch.second); 754bdd1243dSDimitry Andric if (!CommonRes) { 755bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - No CommonRes identified\n"); 756bdd1243dSDimitry Andric return nullptr; 757bdd1243dSDimitry Andric } 758bdd1243dSDimitry Andric 759bdd1243dSDimitry Andric NodePtr Node = prepareCompositeNode( 760bdd1243dSDimitry Andric ComplexDeinterleavingOperation::CMulPartial, Real, Imag); 761bdd1243dSDimitry Andric Node->Rotation = Rotation; 762bdd1243dSDimitry Andric Node->addOperand(CommonRes); 763bdd1243dSDimitry Andric Node->addOperand(UncommonRes); 764bdd1243dSDimitry Andric Node->addOperand(CNode); 765bdd1243dSDimitry Andric return submitCompositeNode(Node); 766bdd1243dSDimitry Andric } 767bdd1243dSDimitry Andric 768bdd1243dSDimitry Andric ComplexDeinterleavingGraph::NodePtr 769bdd1243dSDimitry Andric ComplexDeinterleavingGraph::identifyAdd(Instruction *Real, Instruction *Imag) { 770bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "identifyAdd " << *Real << " / " << *Imag << "\n"); 771bdd1243dSDimitry Andric 772bdd1243dSDimitry Andric // Determine rotation 773bdd1243dSDimitry Andric ComplexDeinterleavingRotation Rotation; 774bdd1243dSDimitry Andric if ((Real->getOpcode() == Instruction::FSub && 775bdd1243dSDimitry Andric Imag->getOpcode() == Instruction::FAdd) || 776bdd1243dSDimitry Andric (Real->getOpcode() == Instruction::Sub && 777bdd1243dSDimitry Andric Imag->getOpcode() == Instruction::Add)) 778bdd1243dSDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_90; 779bdd1243dSDimitry Andric else if ((Real->getOpcode() == Instruction::FAdd && 780bdd1243dSDimitry Andric Imag->getOpcode() == Instruction::FSub) || 781bdd1243dSDimitry Andric (Real->getOpcode() == Instruction::Add && 782bdd1243dSDimitry Andric Imag->getOpcode() == Instruction::Sub)) 783bdd1243dSDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_270; 784bdd1243dSDimitry Andric else { 785bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Unhandled case, rotation is not assigned.\n"); 786bdd1243dSDimitry Andric return nullptr; 787bdd1243dSDimitry Andric } 788bdd1243dSDimitry Andric 789bdd1243dSDimitry Andric auto *AR = dyn_cast<Instruction>(Real->getOperand(0)); 790bdd1243dSDimitry Andric auto *BI = dyn_cast<Instruction>(Real->getOperand(1)); 791bdd1243dSDimitry Andric auto *AI = dyn_cast<Instruction>(Imag->getOperand(0)); 792bdd1243dSDimitry Andric auto *BR = dyn_cast<Instruction>(Imag->getOperand(1)); 793bdd1243dSDimitry Andric 794bdd1243dSDimitry Andric if (!AR || !AI || !BR || !BI) { 795bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Not all operands are instructions.\n"); 796bdd1243dSDimitry Andric return nullptr; 797bdd1243dSDimitry Andric } 798bdd1243dSDimitry Andric 799bdd1243dSDimitry Andric NodePtr ResA = identifyNode(AR, AI); 800bdd1243dSDimitry Andric if (!ResA) { 801bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - AR/AI is not identified as a composite node.\n"); 802bdd1243dSDimitry Andric return nullptr; 803bdd1243dSDimitry Andric } 804bdd1243dSDimitry Andric NodePtr ResB = identifyNode(BR, BI); 805bdd1243dSDimitry Andric if (!ResB) { 806bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - BR/BI is not identified as a composite node.\n"); 807bdd1243dSDimitry Andric return nullptr; 808bdd1243dSDimitry Andric } 809bdd1243dSDimitry Andric 810bdd1243dSDimitry Andric NodePtr Node = 811bdd1243dSDimitry Andric prepareCompositeNode(ComplexDeinterleavingOperation::CAdd, Real, Imag); 812bdd1243dSDimitry Andric Node->Rotation = Rotation; 813bdd1243dSDimitry Andric Node->addOperand(ResA); 814bdd1243dSDimitry Andric Node->addOperand(ResB); 815bdd1243dSDimitry Andric return submitCompositeNode(Node); 816bdd1243dSDimitry Andric } 817bdd1243dSDimitry Andric 818bdd1243dSDimitry Andric static bool isInstructionPairAdd(Instruction *A, Instruction *B) { 819bdd1243dSDimitry Andric unsigned OpcA = A->getOpcode(); 820bdd1243dSDimitry Andric unsigned OpcB = B->getOpcode(); 821bdd1243dSDimitry Andric 822bdd1243dSDimitry Andric return (OpcA == Instruction::FSub && OpcB == Instruction::FAdd) || 823bdd1243dSDimitry Andric (OpcA == Instruction::FAdd && OpcB == Instruction::FSub) || 824bdd1243dSDimitry Andric (OpcA == Instruction::Sub && OpcB == Instruction::Add) || 825bdd1243dSDimitry Andric (OpcA == Instruction::Add && OpcB == Instruction::Sub); 826bdd1243dSDimitry Andric } 827bdd1243dSDimitry Andric 828bdd1243dSDimitry Andric static bool isInstructionPairMul(Instruction *A, Instruction *B) { 829bdd1243dSDimitry Andric auto Pattern = 830bdd1243dSDimitry Andric m_BinOp(m_FMul(m_Value(), m_Value()), m_FMul(m_Value(), m_Value())); 831bdd1243dSDimitry Andric 832bdd1243dSDimitry Andric return match(A, Pattern) && match(B, Pattern); 833bdd1243dSDimitry Andric } 834bdd1243dSDimitry Andric 83506c3fb27SDimitry Andric static bool isInstructionPotentiallySymmetric(Instruction *I) { 83606c3fb27SDimitry Andric switch (I->getOpcode()) { 83706c3fb27SDimitry Andric case Instruction::FAdd: 83806c3fb27SDimitry Andric case Instruction::FSub: 83906c3fb27SDimitry Andric case Instruction::FMul: 84006c3fb27SDimitry Andric case Instruction::FNeg: 84106c3fb27SDimitry Andric case Instruction::Add: 84206c3fb27SDimitry Andric case Instruction::Sub: 84306c3fb27SDimitry Andric case Instruction::Mul: 84406c3fb27SDimitry Andric return true; 84506c3fb27SDimitry Andric default: 84606c3fb27SDimitry Andric return false; 84706c3fb27SDimitry Andric } 84806c3fb27SDimitry Andric } 84906c3fb27SDimitry Andric 850bdd1243dSDimitry Andric ComplexDeinterleavingGraph::NodePtr 85106c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifySymmetricOperation(Instruction *Real, 85206c3fb27SDimitry Andric Instruction *Imag) { 85306c3fb27SDimitry Andric if (Real->getOpcode() != Imag->getOpcode()) 85406c3fb27SDimitry Andric return nullptr; 85506c3fb27SDimitry Andric 85606c3fb27SDimitry Andric if (!isInstructionPotentiallySymmetric(Real) || 85706c3fb27SDimitry Andric !isInstructionPotentiallySymmetric(Imag)) 85806c3fb27SDimitry Andric return nullptr; 85906c3fb27SDimitry Andric 86006c3fb27SDimitry Andric auto *R0 = Real->getOperand(0); 86106c3fb27SDimitry Andric auto *I0 = Imag->getOperand(0); 86206c3fb27SDimitry Andric 86306c3fb27SDimitry Andric NodePtr Op0 = identifyNode(R0, I0); 86406c3fb27SDimitry Andric NodePtr Op1 = nullptr; 86506c3fb27SDimitry Andric if (Op0 == nullptr) 86606c3fb27SDimitry Andric return nullptr; 86706c3fb27SDimitry Andric 86806c3fb27SDimitry Andric if (Real->isBinaryOp()) { 86906c3fb27SDimitry Andric auto *R1 = Real->getOperand(1); 87006c3fb27SDimitry Andric auto *I1 = Imag->getOperand(1); 87106c3fb27SDimitry Andric Op1 = identifyNode(R1, I1); 87206c3fb27SDimitry Andric if (Op1 == nullptr) 87306c3fb27SDimitry Andric return nullptr; 87406c3fb27SDimitry Andric } 87506c3fb27SDimitry Andric 87606c3fb27SDimitry Andric if (isa<FPMathOperator>(Real) && 87706c3fb27SDimitry Andric Real->getFastMathFlags() != Imag->getFastMathFlags()) 87806c3fb27SDimitry Andric return nullptr; 87906c3fb27SDimitry Andric 88006c3fb27SDimitry Andric auto Node = prepareCompositeNode(ComplexDeinterleavingOperation::Symmetric, 88106c3fb27SDimitry Andric Real, Imag); 88206c3fb27SDimitry Andric Node->Opcode = Real->getOpcode(); 88306c3fb27SDimitry Andric if (isa<FPMathOperator>(Real)) 88406c3fb27SDimitry Andric Node->Flags = Real->getFastMathFlags(); 88506c3fb27SDimitry Andric 88606c3fb27SDimitry Andric Node->addOperand(Op0); 88706c3fb27SDimitry Andric if (Real->isBinaryOp()) 88806c3fb27SDimitry Andric Node->addOperand(Op1); 88906c3fb27SDimitry Andric 89006c3fb27SDimitry Andric return submitCompositeNode(Node); 89106c3fb27SDimitry Andric } 89206c3fb27SDimitry Andric 89306c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 89406c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifyNode(Value *R, Value *I) { 89506c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "identifyNode on " << *R << " / " << *I << "\n"); 89606c3fb27SDimitry Andric assert(R->getType() == I->getType() && 89706c3fb27SDimitry Andric "Real and imaginary parts should not have different types"); 898*8a4dda33SDimitry Andric 899*8a4dda33SDimitry Andric auto It = CachedResult.find({R, I}); 900*8a4dda33SDimitry Andric if (It != CachedResult.end()) { 901bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Folding to existing node\n"); 902*8a4dda33SDimitry Andric return It->second; 903bdd1243dSDimitry Andric } 904bdd1243dSDimitry Andric 90506c3fb27SDimitry Andric if (NodePtr CN = identifySplat(R, I)) 90606c3fb27SDimitry Andric return CN; 90706c3fb27SDimitry Andric 90806c3fb27SDimitry Andric auto *Real = dyn_cast<Instruction>(R); 90906c3fb27SDimitry Andric auto *Imag = dyn_cast<Instruction>(I); 91006c3fb27SDimitry Andric if (!Real || !Imag) 91106c3fb27SDimitry Andric return nullptr; 91206c3fb27SDimitry Andric 91306c3fb27SDimitry Andric if (NodePtr CN = identifyDeinterleave(Real, Imag)) 91406c3fb27SDimitry Andric return CN; 91506c3fb27SDimitry Andric 91606c3fb27SDimitry Andric if (NodePtr CN = identifyPHINode(Real, Imag)) 91706c3fb27SDimitry Andric return CN; 91806c3fb27SDimitry Andric 91906c3fb27SDimitry Andric if (NodePtr CN = identifySelectNode(Real, Imag)) 92006c3fb27SDimitry Andric return CN; 92106c3fb27SDimitry Andric 92206c3fb27SDimitry Andric auto *VTy = cast<VectorType>(Real->getType()); 92306c3fb27SDimitry Andric auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy); 92406c3fb27SDimitry Andric 92506c3fb27SDimitry Andric bool HasCMulSupport = TL->isComplexDeinterleavingOperationSupported( 92606c3fb27SDimitry Andric ComplexDeinterleavingOperation::CMulPartial, NewVTy); 92706c3fb27SDimitry Andric bool HasCAddSupport = TL->isComplexDeinterleavingOperationSupported( 92806c3fb27SDimitry Andric ComplexDeinterleavingOperation::CAdd, NewVTy); 92906c3fb27SDimitry Andric 93006c3fb27SDimitry Andric if (HasCMulSupport && isInstructionPairMul(Real, Imag)) { 93106c3fb27SDimitry Andric if (NodePtr CN = identifyPartialMul(Real, Imag)) 93206c3fb27SDimitry Andric return CN; 93306c3fb27SDimitry Andric } 93406c3fb27SDimitry Andric 93506c3fb27SDimitry Andric if (HasCAddSupport && isInstructionPairAdd(Real, Imag)) { 93606c3fb27SDimitry Andric if (NodePtr CN = identifyAdd(Real, Imag)) 93706c3fb27SDimitry Andric return CN; 93806c3fb27SDimitry Andric } 93906c3fb27SDimitry Andric 94006c3fb27SDimitry Andric if (HasCMulSupport && HasCAddSupport) { 94106c3fb27SDimitry Andric if (NodePtr CN = identifyReassocNodes(Real, Imag)) 94206c3fb27SDimitry Andric return CN; 94306c3fb27SDimitry Andric } 94406c3fb27SDimitry Andric 94506c3fb27SDimitry Andric if (NodePtr CN = identifySymmetricOperation(Real, Imag)) 94606c3fb27SDimitry Andric return CN; 94706c3fb27SDimitry Andric 94806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " - Not recognised as a valid pattern.\n"); 949*8a4dda33SDimitry Andric CachedResult[{R, I}] = nullptr; 95006c3fb27SDimitry Andric return nullptr; 95106c3fb27SDimitry Andric } 95206c3fb27SDimitry Andric 95306c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 95406c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifyReassocNodes(Instruction *Real, 95506c3fb27SDimitry Andric Instruction *Imag) { 95606c3fb27SDimitry Andric auto IsOperationSupported = [](unsigned Opcode) -> bool { 95706c3fb27SDimitry Andric return Opcode == Instruction::FAdd || Opcode == Instruction::FSub || 95806c3fb27SDimitry Andric Opcode == Instruction::FNeg || Opcode == Instruction::Add || 95906c3fb27SDimitry Andric Opcode == Instruction::Sub; 96006c3fb27SDimitry Andric }; 96106c3fb27SDimitry Andric 96206c3fb27SDimitry Andric if (!IsOperationSupported(Real->getOpcode()) || 96306c3fb27SDimitry Andric !IsOperationSupported(Imag->getOpcode())) 96406c3fb27SDimitry Andric return nullptr; 96506c3fb27SDimitry Andric 96606c3fb27SDimitry Andric std::optional<FastMathFlags> Flags; 96706c3fb27SDimitry Andric if (isa<FPMathOperator>(Real)) { 96806c3fb27SDimitry Andric if (Real->getFastMathFlags() != Imag->getFastMathFlags()) { 96906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "The flags in Real and Imaginary instructions are " 97006c3fb27SDimitry Andric "not identical\n"); 97106c3fb27SDimitry Andric return nullptr; 97206c3fb27SDimitry Andric } 97306c3fb27SDimitry Andric 97406c3fb27SDimitry Andric Flags = Real->getFastMathFlags(); 97506c3fb27SDimitry Andric if (!Flags->allowReassoc()) { 97606c3fb27SDimitry Andric LLVM_DEBUG( 97706c3fb27SDimitry Andric dbgs() 97806c3fb27SDimitry Andric << "the 'Reassoc' attribute is missing in the FastMath flags\n"); 97906c3fb27SDimitry Andric return nullptr; 98006c3fb27SDimitry Andric } 98106c3fb27SDimitry Andric } 98206c3fb27SDimitry Andric 98306c3fb27SDimitry Andric // Collect multiplications and addend instructions from the given instruction 98406c3fb27SDimitry Andric // while traversing it operands. Additionally, verify that all instructions 98506c3fb27SDimitry Andric // have the same fast math flags. 98606c3fb27SDimitry Andric auto Collect = [&Flags](Instruction *Insn, std::vector<Product> &Muls, 98706c3fb27SDimitry Andric std::list<Addend> &Addends) -> bool { 98806c3fb27SDimitry Andric SmallVector<PointerIntPair<Value *, 1, bool>> Worklist = {{Insn, true}}; 98906c3fb27SDimitry Andric SmallPtrSet<Value *, 8> Visited; 99006c3fb27SDimitry Andric while (!Worklist.empty()) { 99106c3fb27SDimitry Andric auto [V, IsPositive] = Worklist.back(); 99206c3fb27SDimitry Andric Worklist.pop_back(); 99306c3fb27SDimitry Andric if (!Visited.insert(V).second) 99406c3fb27SDimitry Andric continue; 99506c3fb27SDimitry Andric 99606c3fb27SDimitry Andric Instruction *I = dyn_cast<Instruction>(V); 99706c3fb27SDimitry Andric if (!I) { 99806c3fb27SDimitry Andric Addends.emplace_back(V, IsPositive); 99906c3fb27SDimitry Andric continue; 100006c3fb27SDimitry Andric } 100106c3fb27SDimitry Andric 100206c3fb27SDimitry Andric // If an instruction has more than one user, it indicates that it either 100306c3fb27SDimitry Andric // has an external user, which will be later checked by the checkNodes 100406c3fb27SDimitry Andric // function, or it is a subexpression utilized by multiple expressions. In 100506c3fb27SDimitry Andric // the latter case, we will attempt to separately identify the complex 100606c3fb27SDimitry Andric // operation from here in order to create a shared 100706c3fb27SDimitry Andric // ComplexDeinterleavingCompositeNode. 100806c3fb27SDimitry Andric if (I != Insn && I->getNumUses() > 1) { 100906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Found potential sub-expression: " << *I << "\n"); 101006c3fb27SDimitry Andric Addends.emplace_back(I, IsPositive); 101106c3fb27SDimitry Andric continue; 101206c3fb27SDimitry Andric } 101306c3fb27SDimitry Andric switch (I->getOpcode()) { 101406c3fb27SDimitry Andric case Instruction::FAdd: 101506c3fb27SDimitry Andric case Instruction::Add: 101606c3fb27SDimitry Andric Worklist.emplace_back(I->getOperand(1), IsPositive); 101706c3fb27SDimitry Andric Worklist.emplace_back(I->getOperand(0), IsPositive); 101806c3fb27SDimitry Andric break; 101906c3fb27SDimitry Andric case Instruction::FSub: 102006c3fb27SDimitry Andric Worklist.emplace_back(I->getOperand(1), !IsPositive); 102106c3fb27SDimitry Andric Worklist.emplace_back(I->getOperand(0), IsPositive); 102206c3fb27SDimitry Andric break; 102306c3fb27SDimitry Andric case Instruction::Sub: 102406c3fb27SDimitry Andric if (isNeg(I)) { 102506c3fb27SDimitry Andric Worklist.emplace_back(getNegOperand(I), !IsPositive); 102606c3fb27SDimitry Andric } else { 102706c3fb27SDimitry Andric Worklist.emplace_back(I->getOperand(1), !IsPositive); 102806c3fb27SDimitry Andric Worklist.emplace_back(I->getOperand(0), IsPositive); 102906c3fb27SDimitry Andric } 103006c3fb27SDimitry Andric break; 103106c3fb27SDimitry Andric case Instruction::FMul: 103206c3fb27SDimitry Andric case Instruction::Mul: { 103306c3fb27SDimitry Andric Value *A, *B; 103406c3fb27SDimitry Andric if (isNeg(I->getOperand(0))) { 103506c3fb27SDimitry Andric A = getNegOperand(I->getOperand(0)); 103606c3fb27SDimitry Andric IsPositive = !IsPositive; 103706c3fb27SDimitry Andric } else { 103806c3fb27SDimitry Andric A = I->getOperand(0); 103906c3fb27SDimitry Andric } 104006c3fb27SDimitry Andric 104106c3fb27SDimitry Andric if (isNeg(I->getOperand(1))) { 104206c3fb27SDimitry Andric B = getNegOperand(I->getOperand(1)); 104306c3fb27SDimitry Andric IsPositive = !IsPositive; 104406c3fb27SDimitry Andric } else { 104506c3fb27SDimitry Andric B = I->getOperand(1); 104606c3fb27SDimitry Andric } 104706c3fb27SDimitry Andric Muls.push_back(Product{A, B, IsPositive}); 104806c3fb27SDimitry Andric break; 104906c3fb27SDimitry Andric } 105006c3fb27SDimitry Andric case Instruction::FNeg: 105106c3fb27SDimitry Andric Worklist.emplace_back(I->getOperand(0), !IsPositive); 105206c3fb27SDimitry Andric break; 105306c3fb27SDimitry Andric default: 105406c3fb27SDimitry Andric Addends.emplace_back(I, IsPositive); 105506c3fb27SDimitry Andric continue; 105606c3fb27SDimitry Andric } 105706c3fb27SDimitry Andric 105806c3fb27SDimitry Andric if (Flags && I->getFastMathFlags() != *Flags) { 105906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "The instruction's fast math flags are " 106006c3fb27SDimitry Andric "inconsistent with the root instructions' flags: " 106106c3fb27SDimitry Andric << *I << "\n"); 106206c3fb27SDimitry Andric return false; 106306c3fb27SDimitry Andric } 106406c3fb27SDimitry Andric } 106506c3fb27SDimitry Andric return true; 106606c3fb27SDimitry Andric }; 106706c3fb27SDimitry Andric 106806c3fb27SDimitry Andric std::vector<Product> RealMuls, ImagMuls; 106906c3fb27SDimitry Andric std::list<Addend> RealAddends, ImagAddends; 107006c3fb27SDimitry Andric if (!Collect(Real, RealMuls, RealAddends) || 107106c3fb27SDimitry Andric !Collect(Imag, ImagMuls, ImagAddends)) 107206c3fb27SDimitry Andric return nullptr; 107306c3fb27SDimitry Andric 107406c3fb27SDimitry Andric if (RealAddends.size() != ImagAddends.size()) 107506c3fb27SDimitry Andric return nullptr; 107606c3fb27SDimitry Andric 107706c3fb27SDimitry Andric NodePtr FinalNode; 107806c3fb27SDimitry Andric if (!RealMuls.empty() || !ImagMuls.empty()) { 107906c3fb27SDimitry Andric // If there are multiplicands, extract positive addend and use it as an 108006c3fb27SDimitry Andric // accumulator 108106c3fb27SDimitry Andric FinalNode = extractPositiveAddend(RealAddends, ImagAddends); 108206c3fb27SDimitry Andric FinalNode = identifyMultiplications(RealMuls, ImagMuls, FinalNode); 108306c3fb27SDimitry Andric if (!FinalNode) 108406c3fb27SDimitry Andric return nullptr; 108506c3fb27SDimitry Andric } 108606c3fb27SDimitry Andric 108706c3fb27SDimitry Andric // Identify and process remaining additions 108806c3fb27SDimitry Andric if (!RealAddends.empty() || !ImagAddends.empty()) { 108906c3fb27SDimitry Andric FinalNode = identifyAdditions(RealAddends, ImagAddends, Flags, FinalNode); 109006c3fb27SDimitry Andric if (!FinalNode) 109106c3fb27SDimitry Andric return nullptr; 109206c3fb27SDimitry Andric } 109306c3fb27SDimitry Andric assert(FinalNode && "FinalNode can not be nullptr here"); 109406c3fb27SDimitry Andric // Set the Real and Imag fields of the final node and submit it 109506c3fb27SDimitry Andric FinalNode->Real = Real; 109606c3fb27SDimitry Andric FinalNode->Imag = Imag; 109706c3fb27SDimitry Andric submitCompositeNode(FinalNode); 109806c3fb27SDimitry Andric return FinalNode; 109906c3fb27SDimitry Andric } 110006c3fb27SDimitry Andric 110106c3fb27SDimitry Andric bool ComplexDeinterleavingGraph::collectPartialMuls( 110206c3fb27SDimitry Andric const std::vector<Product> &RealMuls, const std::vector<Product> &ImagMuls, 110306c3fb27SDimitry Andric std::vector<PartialMulCandidate> &PartialMulCandidates) { 110406c3fb27SDimitry Andric // Helper function to extract a common operand from two products 110506c3fb27SDimitry Andric auto FindCommonInstruction = [](const Product &Real, 110606c3fb27SDimitry Andric const Product &Imag) -> Value * { 110706c3fb27SDimitry Andric if (Real.Multiplicand == Imag.Multiplicand || 110806c3fb27SDimitry Andric Real.Multiplicand == Imag.Multiplier) 110906c3fb27SDimitry Andric return Real.Multiplicand; 111006c3fb27SDimitry Andric 111106c3fb27SDimitry Andric if (Real.Multiplier == Imag.Multiplicand || 111206c3fb27SDimitry Andric Real.Multiplier == Imag.Multiplier) 111306c3fb27SDimitry Andric return Real.Multiplier; 111406c3fb27SDimitry Andric 111506c3fb27SDimitry Andric return nullptr; 111606c3fb27SDimitry Andric }; 111706c3fb27SDimitry Andric 111806c3fb27SDimitry Andric // Iterating over real and imaginary multiplications to find common operands 111906c3fb27SDimitry Andric // If a common operand is found, a partial multiplication candidate is created 112006c3fb27SDimitry Andric // and added to the candidates vector The function returns false if no common 112106c3fb27SDimitry Andric // operands are found for any product 112206c3fb27SDimitry Andric for (unsigned i = 0; i < RealMuls.size(); ++i) { 112306c3fb27SDimitry Andric bool FoundCommon = false; 112406c3fb27SDimitry Andric for (unsigned j = 0; j < ImagMuls.size(); ++j) { 112506c3fb27SDimitry Andric auto *Common = FindCommonInstruction(RealMuls[i], ImagMuls[j]); 112606c3fb27SDimitry Andric if (!Common) 112706c3fb27SDimitry Andric continue; 112806c3fb27SDimitry Andric 112906c3fb27SDimitry Andric auto *A = RealMuls[i].Multiplicand == Common ? RealMuls[i].Multiplier 113006c3fb27SDimitry Andric : RealMuls[i].Multiplicand; 113106c3fb27SDimitry Andric auto *B = ImagMuls[j].Multiplicand == Common ? ImagMuls[j].Multiplier 113206c3fb27SDimitry Andric : ImagMuls[j].Multiplicand; 113306c3fb27SDimitry Andric 113406c3fb27SDimitry Andric auto Node = identifyNode(A, B); 113506c3fb27SDimitry Andric if (Node) { 113606c3fb27SDimitry Andric FoundCommon = true; 113706c3fb27SDimitry Andric PartialMulCandidates.push_back({Common, Node, i, j, false}); 113806c3fb27SDimitry Andric } 113906c3fb27SDimitry Andric 114006c3fb27SDimitry Andric Node = identifyNode(B, A); 114106c3fb27SDimitry Andric if (Node) { 114206c3fb27SDimitry Andric FoundCommon = true; 114306c3fb27SDimitry Andric PartialMulCandidates.push_back({Common, Node, i, j, true}); 114406c3fb27SDimitry Andric } 114506c3fb27SDimitry Andric } 114606c3fb27SDimitry Andric if (!FoundCommon) 114706c3fb27SDimitry Andric return false; 114806c3fb27SDimitry Andric } 114906c3fb27SDimitry Andric return true; 115006c3fb27SDimitry Andric } 115106c3fb27SDimitry Andric 115206c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 115306c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifyMultiplications( 115406c3fb27SDimitry Andric std::vector<Product> &RealMuls, std::vector<Product> &ImagMuls, 115506c3fb27SDimitry Andric NodePtr Accumulator = nullptr) { 115606c3fb27SDimitry Andric if (RealMuls.size() != ImagMuls.size()) 115706c3fb27SDimitry Andric return nullptr; 115806c3fb27SDimitry Andric 115906c3fb27SDimitry Andric std::vector<PartialMulCandidate> Info; 116006c3fb27SDimitry Andric if (!collectPartialMuls(RealMuls, ImagMuls, Info)) 116106c3fb27SDimitry Andric return nullptr; 116206c3fb27SDimitry Andric 116306c3fb27SDimitry Andric // Map to store common instruction to node pointers 116406c3fb27SDimitry Andric std::map<Value *, NodePtr> CommonToNode; 116506c3fb27SDimitry Andric std::vector<bool> Processed(Info.size(), false); 116606c3fb27SDimitry Andric for (unsigned I = 0; I < Info.size(); ++I) { 116706c3fb27SDimitry Andric if (Processed[I]) 116806c3fb27SDimitry Andric continue; 116906c3fb27SDimitry Andric 117006c3fb27SDimitry Andric PartialMulCandidate &InfoA = Info[I]; 117106c3fb27SDimitry Andric for (unsigned J = I + 1; J < Info.size(); ++J) { 117206c3fb27SDimitry Andric if (Processed[J]) 117306c3fb27SDimitry Andric continue; 117406c3fb27SDimitry Andric 117506c3fb27SDimitry Andric PartialMulCandidate &InfoB = Info[J]; 117606c3fb27SDimitry Andric auto *InfoReal = &InfoA; 117706c3fb27SDimitry Andric auto *InfoImag = &InfoB; 117806c3fb27SDimitry Andric 117906c3fb27SDimitry Andric auto NodeFromCommon = identifyNode(InfoReal->Common, InfoImag->Common); 118006c3fb27SDimitry Andric if (!NodeFromCommon) { 118106c3fb27SDimitry Andric std::swap(InfoReal, InfoImag); 118206c3fb27SDimitry Andric NodeFromCommon = identifyNode(InfoReal->Common, InfoImag->Common); 118306c3fb27SDimitry Andric } 118406c3fb27SDimitry Andric if (!NodeFromCommon) 118506c3fb27SDimitry Andric continue; 118606c3fb27SDimitry Andric 118706c3fb27SDimitry Andric CommonToNode[InfoReal->Common] = NodeFromCommon; 118806c3fb27SDimitry Andric CommonToNode[InfoImag->Common] = NodeFromCommon; 118906c3fb27SDimitry Andric Processed[I] = true; 119006c3fb27SDimitry Andric Processed[J] = true; 119106c3fb27SDimitry Andric } 119206c3fb27SDimitry Andric } 119306c3fb27SDimitry Andric 119406c3fb27SDimitry Andric std::vector<bool> ProcessedReal(RealMuls.size(), false); 119506c3fb27SDimitry Andric std::vector<bool> ProcessedImag(ImagMuls.size(), false); 119606c3fb27SDimitry Andric NodePtr Result = Accumulator; 119706c3fb27SDimitry Andric for (auto &PMI : Info) { 119806c3fb27SDimitry Andric if (ProcessedReal[PMI.RealIdx] || ProcessedImag[PMI.ImagIdx]) 119906c3fb27SDimitry Andric continue; 120006c3fb27SDimitry Andric 120106c3fb27SDimitry Andric auto It = CommonToNode.find(PMI.Common); 120206c3fb27SDimitry Andric // TODO: Process independent complex multiplications. Cases like this: 120306c3fb27SDimitry Andric // A.real() * B where both A and B are complex numbers. 120406c3fb27SDimitry Andric if (It == CommonToNode.end()) { 120506c3fb27SDimitry Andric LLVM_DEBUG({ 120606c3fb27SDimitry Andric dbgs() << "Unprocessed independent partial multiplication:\n"; 120706c3fb27SDimitry Andric for (auto *Mul : {&RealMuls[PMI.RealIdx], &RealMuls[PMI.RealIdx]}) 120806c3fb27SDimitry Andric dbgs().indent(4) << (Mul->IsPositive ? "+" : "-") << *Mul->Multiplier 120906c3fb27SDimitry Andric << " multiplied by " << *Mul->Multiplicand << "\n"; 121006c3fb27SDimitry Andric }); 121106c3fb27SDimitry Andric return nullptr; 121206c3fb27SDimitry Andric } 121306c3fb27SDimitry Andric 121406c3fb27SDimitry Andric auto &RealMul = RealMuls[PMI.RealIdx]; 121506c3fb27SDimitry Andric auto &ImagMul = ImagMuls[PMI.ImagIdx]; 121606c3fb27SDimitry Andric 121706c3fb27SDimitry Andric auto NodeA = It->second; 121806c3fb27SDimitry Andric auto NodeB = PMI.Node; 121906c3fb27SDimitry Andric auto IsMultiplicandReal = PMI.Common == NodeA->Real; 122006c3fb27SDimitry Andric // The following table illustrates the relationship between multiplications 122106c3fb27SDimitry Andric // and rotations. If we consider the multiplication (X + iY) * (U + iV), we 122206c3fb27SDimitry Andric // can see: 122306c3fb27SDimitry Andric // 122406c3fb27SDimitry Andric // Rotation | Real | Imag | 122506c3fb27SDimitry Andric // ---------+--------+--------+ 122606c3fb27SDimitry Andric // 0 | x * u | x * v | 122706c3fb27SDimitry Andric // 90 | -y * v | y * u | 122806c3fb27SDimitry Andric // 180 | -x * u | -x * v | 122906c3fb27SDimitry Andric // 270 | y * v | -y * u | 123006c3fb27SDimitry Andric // 123106c3fb27SDimitry Andric // Check if the candidate can indeed be represented by partial 123206c3fb27SDimitry Andric // multiplication 123306c3fb27SDimitry Andric // TODO: Add support for multiplication by complex one 123406c3fb27SDimitry Andric if ((IsMultiplicandReal && PMI.IsNodeInverted) || 123506c3fb27SDimitry Andric (!IsMultiplicandReal && !PMI.IsNodeInverted)) 123606c3fb27SDimitry Andric continue; 123706c3fb27SDimitry Andric 123806c3fb27SDimitry Andric // Determine the rotation based on the multiplications 123906c3fb27SDimitry Andric ComplexDeinterleavingRotation Rotation; 124006c3fb27SDimitry Andric if (IsMultiplicandReal) { 124106c3fb27SDimitry Andric // Detect 0 and 180 degrees rotation 124206c3fb27SDimitry Andric if (RealMul.IsPositive && ImagMul.IsPositive) 124306c3fb27SDimitry Andric Rotation = llvm::ComplexDeinterleavingRotation::Rotation_0; 124406c3fb27SDimitry Andric else if (!RealMul.IsPositive && !ImagMul.IsPositive) 124506c3fb27SDimitry Andric Rotation = llvm::ComplexDeinterleavingRotation::Rotation_180; 124606c3fb27SDimitry Andric else 124706c3fb27SDimitry Andric continue; 124806c3fb27SDimitry Andric 124906c3fb27SDimitry Andric } else { 125006c3fb27SDimitry Andric // Detect 90 and 270 degrees rotation 125106c3fb27SDimitry Andric if (!RealMul.IsPositive && ImagMul.IsPositive) 125206c3fb27SDimitry Andric Rotation = llvm::ComplexDeinterleavingRotation::Rotation_90; 125306c3fb27SDimitry Andric else if (RealMul.IsPositive && !ImagMul.IsPositive) 125406c3fb27SDimitry Andric Rotation = llvm::ComplexDeinterleavingRotation::Rotation_270; 125506c3fb27SDimitry Andric else 125606c3fb27SDimitry Andric continue; 125706c3fb27SDimitry Andric } 125806c3fb27SDimitry Andric 125906c3fb27SDimitry Andric LLVM_DEBUG({ 126006c3fb27SDimitry Andric dbgs() << "Identified partial multiplication (X, Y) * (U, V):\n"; 126106c3fb27SDimitry Andric dbgs().indent(4) << "X: " << *NodeA->Real << "\n"; 126206c3fb27SDimitry Andric dbgs().indent(4) << "Y: " << *NodeA->Imag << "\n"; 126306c3fb27SDimitry Andric dbgs().indent(4) << "U: " << *NodeB->Real << "\n"; 126406c3fb27SDimitry Andric dbgs().indent(4) << "V: " << *NodeB->Imag << "\n"; 126506c3fb27SDimitry Andric dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n"; 126606c3fb27SDimitry Andric }); 126706c3fb27SDimitry Andric 126806c3fb27SDimitry Andric NodePtr NodeMul = prepareCompositeNode( 126906c3fb27SDimitry Andric ComplexDeinterleavingOperation::CMulPartial, nullptr, nullptr); 127006c3fb27SDimitry Andric NodeMul->Rotation = Rotation; 127106c3fb27SDimitry Andric NodeMul->addOperand(NodeA); 127206c3fb27SDimitry Andric NodeMul->addOperand(NodeB); 127306c3fb27SDimitry Andric if (Result) 127406c3fb27SDimitry Andric NodeMul->addOperand(Result); 127506c3fb27SDimitry Andric submitCompositeNode(NodeMul); 127606c3fb27SDimitry Andric Result = NodeMul; 127706c3fb27SDimitry Andric ProcessedReal[PMI.RealIdx] = true; 127806c3fb27SDimitry Andric ProcessedImag[PMI.ImagIdx] = true; 127906c3fb27SDimitry Andric } 128006c3fb27SDimitry Andric 128106c3fb27SDimitry Andric // Ensure all products have been processed, if not return nullptr. 128206c3fb27SDimitry Andric if (!all_of(ProcessedReal, [](bool V) { return V; }) || 128306c3fb27SDimitry Andric !all_of(ProcessedImag, [](bool V) { return V; })) { 128406c3fb27SDimitry Andric 128506c3fb27SDimitry Andric // Dump debug information about which partial multiplications are not 128606c3fb27SDimitry Andric // processed. 128706c3fb27SDimitry Andric LLVM_DEBUG({ 128806c3fb27SDimitry Andric dbgs() << "Unprocessed products (Real):\n"; 128906c3fb27SDimitry Andric for (size_t i = 0; i < ProcessedReal.size(); ++i) { 129006c3fb27SDimitry Andric if (!ProcessedReal[i]) 129106c3fb27SDimitry Andric dbgs().indent(4) << (RealMuls[i].IsPositive ? "+" : "-") 129206c3fb27SDimitry Andric << *RealMuls[i].Multiplier << " multiplied by " 129306c3fb27SDimitry Andric << *RealMuls[i].Multiplicand << "\n"; 129406c3fb27SDimitry Andric } 129506c3fb27SDimitry Andric dbgs() << "Unprocessed products (Imag):\n"; 129606c3fb27SDimitry Andric for (size_t i = 0; i < ProcessedImag.size(); ++i) { 129706c3fb27SDimitry Andric if (!ProcessedImag[i]) 129806c3fb27SDimitry Andric dbgs().indent(4) << (ImagMuls[i].IsPositive ? "+" : "-") 129906c3fb27SDimitry Andric << *ImagMuls[i].Multiplier << " multiplied by " 130006c3fb27SDimitry Andric << *ImagMuls[i].Multiplicand << "\n"; 130106c3fb27SDimitry Andric } 130206c3fb27SDimitry Andric }); 130306c3fb27SDimitry Andric return nullptr; 130406c3fb27SDimitry Andric } 130506c3fb27SDimitry Andric 130606c3fb27SDimitry Andric return Result; 130706c3fb27SDimitry Andric } 130806c3fb27SDimitry Andric 130906c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 131006c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifyAdditions( 131106c3fb27SDimitry Andric std::list<Addend> &RealAddends, std::list<Addend> &ImagAddends, 131206c3fb27SDimitry Andric std::optional<FastMathFlags> Flags, NodePtr Accumulator = nullptr) { 131306c3fb27SDimitry Andric if (RealAddends.size() != ImagAddends.size()) 131406c3fb27SDimitry Andric return nullptr; 131506c3fb27SDimitry Andric 131606c3fb27SDimitry Andric NodePtr Result; 131706c3fb27SDimitry Andric // If we have accumulator use it as first addend 131806c3fb27SDimitry Andric if (Accumulator) 131906c3fb27SDimitry Andric Result = Accumulator; 132006c3fb27SDimitry Andric // Otherwise find an element with both positive real and imaginary parts. 132106c3fb27SDimitry Andric else 132206c3fb27SDimitry Andric Result = extractPositiveAddend(RealAddends, ImagAddends); 132306c3fb27SDimitry Andric 132406c3fb27SDimitry Andric if (!Result) 132506c3fb27SDimitry Andric return nullptr; 132606c3fb27SDimitry Andric 132706c3fb27SDimitry Andric while (!RealAddends.empty()) { 132806c3fb27SDimitry Andric auto ItR = RealAddends.begin(); 132906c3fb27SDimitry Andric auto [R, IsPositiveR] = *ItR; 133006c3fb27SDimitry Andric 133106c3fb27SDimitry Andric bool FoundImag = false; 133206c3fb27SDimitry Andric for (auto ItI = ImagAddends.begin(); ItI != ImagAddends.end(); ++ItI) { 133306c3fb27SDimitry Andric auto [I, IsPositiveI] = *ItI; 133406c3fb27SDimitry Andric ComplexDeinterleavingRotation Rotation; 133506c3fb27SDimitry Andric if (IsPositiveR && IsPositiveI) 133606c3fb27SDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_0; 133706c3fb27SDimitry Andric else if (!IsPositiveR && IsPositiveI) 133806c3fb27SDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_90; 133906c3fb27SDimitry Andric else if (!IsPositiveR && !IsPositiveI) 134006c3fb27SDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_180; 134106c3fb27SDimitry Andric else 134206c3fb27SDimitry Andric Rotation = ComplexDeinterleavingRotation::Rotation_270; 134306c3fb27SDimitry Andric 134406c3fb27SDimitry Andric NodePtr AddNode; 134506c3fb27SDimitry Andric if (Rotation == ComplexDeinterleavingRotation::Rotation_0 || 134606c3fb27SDimitry Andric Rotation == ComplexDeinterleavingRotation::Rotation_180) { 134706c3fb27SDimitry Andric AddNode = identifyNode(R, I); 134806c3fb27SDimitry Andric } else { 134906c3fb27SDimitry Andric AddNode = identifyNode(I, R); 135006c3fb27SDimitry Andric } 135106c3fb27SDimitry Andric if (AddNode) { 135206c3fb27SDimitry Andric LLVM_DEBUG({ 135306c3fb27SDimitry Andric dbgs() << "Identified addition:\n"; 135406c3fb27SDimitry Andric dbgs().indent(4) << "X: " << *R << "\n"; 135506c3fb27SDimitry Andric dbgs().indent(4) << "Y: " << *I << "\n"; 135606c3fb27SDimitry Andric dbgs().indent(4) << "Rotation - " << (int)Rotation * 90 << "\n"; 135706c3fb27SDimitry Andric }); 135806c3fb27SDimitry Andric 135906c3fb27SDimitry Andric NodePtr TmpNode; 136006c3fb27SDimitry Andric if (Rotation == llvm::ComplexDeinterleavingRotation::Rotation_0) { 136106c3fb27SDimitry Andric TmpNode = prepareCompositeNode( 136206c3fb27SDimitry Andric ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr); 136306c3fb27SDimitry Andric if (Flags) { 136406c3fb27SDimitry Andric TmpNode->Opcode = Instruction::FAdd; 136506c3fb27SDimitry Andric TmpNode->Flags = *Flags; 136606c3fb27SDimitry Andric } else { 136706c3fb27SDimitry Andric TmpNode->Opcode = Instruction::Add; 136806c3fb27SDimitry Andric } 136906c3fb27SDimitry Andric } else if (Rotation == 137006c3fb27SDimitry Andric llvm::ComplexDeinterleavingRotation::Rotation_180) { 137106c3fb27SDimitry Andric TmpNode = prepareCompositeNode( 137206c3fb27SDimitry Andric ComplexDeinterleavingOperation::Symmetric, nullptr, nullptr); 137306c3fb27SDimitry Andric if (Flags) { 137406c3fb27SDimitry Andric TmpNode->Opcode = Instruction::FSub; 137506c3fb27SDimitry Andric TmpNode->Flags = *Flags; 137606c3fb27SDimitry Andric } else { 137706c3fb27SDimitry Andric TmpNode->Opcode = Instruction::Sub; 137806c3fb27SDimitry Andric } 137906c3fb27SDimitry Andric } else { 138006c3fb27SDimitry Andric TmpNode = prepareCompositeNode(ComplexDeinterleavingOperation::CAdd, 138106c3fb27SDimitry Andric nullptr, nullptr); 138206c3fb27SDimitry Andric TmpNode->Rotation = Rotation; 138306c3fb27SDimitry Andric } 138406c3fb27SDimitry Andric 138506c3fb27SDimitry Andric TmpNode->addOperand(Result); 138606c3fb27SDimitry Andric TmpNode->addOperand(AddNode); 138706c3fb27SDimitry Andric submitCompositeNode(TmpNode); 138806c3fb27SDimitry Andric Result = TmpNode; 138906c3fb27SDimitry Andric RealAddends.erase(ItR); 139006c3fb27SDimitry Andric ImagAddends.erase(ItI); 139106c3fb27SDimitry Andric FoundImag = true; 139206c3fb27SDimitry Andric break; 139306c3fb27SDimitry Andric } 139406c3fb27SDimitry Andric } 139506c3fb27SDimitry Andric if (!FoundImag) 139606c3fb27SDimitry Andric return nullptr; 139706c3fb27SDimitry Andric } 139806c3fb27SDimitry Andric return Result; 139906c3fb27SDimitry Andric } 140006c3fb27SDimitry Andric 140106c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 140206c3fb27SDimitry Andric ComplexDeinterleavingGraph::extractPositiveAddend( 140306c3fb27SDimitry Andric std::list<Addend> &RealAddends, std::list<Addend> &ImagAddends) { 140406c3fb27SDimitry Andric for (auto ItR = RealAddends.begin(); ItR != RealAddends.end(); ++ItR) { 140506c3fb27SDimitry Andric for (auto ItI = ImagAddends.begin(); ItI != ImagAddends.end(); ++ItI) { 140606c3fb27SDimitry Andric auto [R, IsPositiveR] = *ItR; 140706c3fb27SDimitry Andric auto [I, IsPositiveI] = *ItI; 140806c3fb27SDimitry Andric if (IsPositiveR && IsPositiveI) { 140906c3fb27SDimitry Andric auto Result = identifyNode(R, I); 141006c3fb27SDimitry Andric if (Result) { 141106c3fb27SDimitry Andric RealAddends.erase(ItR); 141206c3fb27SDimitry Andric ImagAddends.erase(ItI); 141306c3fb27SDimitry Andric return Result; 141406c3fb27SDimitry Andric } 141506c3fb27SDimitry Andric } 141606c3fb27SDimitry Andric } 141706c3fb27SDimitry Andric } 141806c3fb27SDimitry Andric return nullptr; 141906c3fb27SDimitry Andric } 142006c3fb27SDimitry Andric 142106c3fb27SDimitry Andric bool ComplexDeinterleavingGraph::identifyNodes(Instruction *RootI) { 142206c3fb27SDimitry Andric // This potential root instruction might already have been recognized as 142306c3fb27SDimitry Andric // reduction. Because RootToNode maps both Real and Imaginary parts to 142406c3fb27SDimitry Andric // CompositeNode we should choose only one either Real or Imag instruction to 142506c3fb27SDimitry Andric // use as an anchor for generating complex instruction. 142606c3fb27SDimitry Andric auto It = RootToNode.find(RootI); 1427*8a4dda33SDimitry Andric if (It != RootToNode.end()) { 1428*8a4dda33SDimitry Andric auto RootNode = It->second; 1429*8a4dda33SDimitry Andric assert(RootNode->Operation == 1430*8a4dda33SDimitry Andric ComplexDeinterleavingOperation::ReductionOperation); 1431*8a4dda33SDimitry Andric // Find out which part, Real or Imag, comes later, and only if we come to 1432*8a4dda33SDimitry Andric // the latest part, add it to OrderedRoots. 1433*8a4dda33SDimitry Andric auto *R = cast<Instruction>(RootNode->Real); 1434*8a4dda33SDimitry Andric auto *I = cast<Instruction>(RootNode->Imag); 1435*8a4dda33SDimitry Andric auto *ReplacementAnchor = R->comesBefore(I) ? I : R; 1436*8a4dda33SDimitry Andric if (ReplacementAnchor != RootI) 1437*8a4dda33SDimitry Andric return false; 143806c3fb27SDimitry Andric OrderedRoots.push_back(RootI); 143906c3fb27SDimitry Andric return true; 144006c3fb27SDimitry Andric } 144106c3fb27SDimitry Andric 144206c3fb27SDimitry Andric auto RootNode = identifyRoot(RootI); 144306c3fb27SDimitry Andric if (!RootNode) 144406c3fb27SDimitry Andric return false; 144506c3fb27SDimitry Andric 144606c3fb27SDimitry Andric LLVM_DEBUG({ 144706c3fb27SDimitry Andric Function *F = RootI->getFunction(); 144806c3fb27SDimitry Andric BasicBlock *B = RootI->getParent(); 144906c3fb27SDimitry Andric dbgs() << "Complex deinterleaving graph for " << F->getName() 145006c3fb27SDimitry Andric << "::" << B->getName() << ".\n"; 145106c3fb27SDimitry Andric dump(dbgs()); 145206c3fb27SDimitry Andric dbgs() << "\n"; 145306c3fb27SDimitry Andric }); 145406c3fb27SDimitry Andric RootToNode[RootI] = RootNode; 145506c3fb27SDimitry Andric OrderedRoots.push_back(RootI); 145606c3fb27SDimitry Andric return true; 145706c3fb27SDimitry Andric } 145806c3fb27SDimitry Andric 145906c3fb27SDimitry Andric bool ComplexDeinterleavingGraph::collectPotentialReductions(BasicBlock *B) { 146006c3fb27SDimitry Andric bool FoundPotentialReduction = false; 146106c3fb27SDimitry Andric 146206c3fb27SDimitry Andric auto *Br = dyn_cast<BranchInst>(B->getTerminator()); 146306c3fb27SDimitry Andric if (!Br || Br->getNumSuccessors() != 2) 146406c3fb27SDimitry Andric return false; 146506c3fb27SDimitry Andric 146606c3fb27SDimitry Andric // Identify simple one-block loop 146706c3fb27SDimitry Andric if (Br->getSuccessor(0) != B && Br->getSuccessor(1) != B) 146806c3fb27SDimitry Andric return false; 146906c3fb27SDimitry Andric 147006c3fb27SDimitry Andric SmallVector<PHINode *> PHIs; 147106c3fb27SDimitry Andric for (auto &PHI : B->phis()) { 147206c3fb27SDimitry Andric if (PHI.getNumIncomingValues() != 2) 147306c3fb27SDimitry Andric continue; 147406c3fb27SDimitry Andric 147506c3fb27SDimitry Andric if (!PHI.getType()->isVectorTy()) 147606c3fb27SDimitry Andric continue; 147706c3fb27SDimitry Andric 147806c3fb27SDimitry Andric auto *ReductionOp = dyn_cast<Instruction>(PHI.getIncomingValueForBlock(B)); 147906c3fb27SDimitry Andric if (!ReductionOp) 148006c3fb27SDimitry Andric continue; 148106c3fb27SDimitry Andric 148206c3fb27SDimitry Andric // Check if final instruction is reduced outside of current block 148306c3fb27SDimitry Andric Instruction *FinalReduction = nullptr; 148406c3fb27SDimitry Andric auto NumUsers = 0u; 148506c3fb27SDimitry Andric for (auto *U : ReductionOp->users()) { 148606c3fb27SDimitry Andric ++NumUsers; 148706c3fb27SDimitry Andric if (U == &PHI) 148806c3fb27SDimitry Andric continue; 148906c3fb27SDimitry Andric FinalReduction = dyn_cast<Instruction>(U); 149006c3fb27SDimitry Andric } 149106c3fb27SDimitry Andric 149206c3fb27SDimitry Andric if (NumUsers != 2 || !FinalReduction || FinalReduction->getParent() == B || 149306c3fb27SDimitry Andric isa<PHINode>(FinalReduction)) 149406c3fb27SDimitry Andric continue; 149506c3fb27SDimitry Andric 149606c3fb27SDimitry Andric ReductionInfo[ReductionOp] = {&PHI, FinalReduction}; 149706c3fb27SDimitry Andric BackEdge = B; 149806c3fb27SDimitry Andric auto BackEdgeIdx = PHI.getBasicBlockIndex(B); 149906c3fb27SDimitry Andric auto IncomingIdx = BackEdgeIdx == 0 ? 1 : 0; 150006c3fb27SDimitry Andric Incoming = PHI.getIncomingBlock(IncomingIdx); 150106c3fb27SDimitry Andric FoundPotentialReduction = true; 150206c3fb27SDimitry Andric 150306c3fb27SDimitry Andric // If the initial value of PHINode is an Instruction, consider it a leaf 150406c3fb27SDimitry Andric // value of a complex deinterleaving graph. 150506c3fb27SDimitry Andric if (auto *InitPHI = 150606c3fb27SDimitry Andric dyn_cast<Instruction>(PHI.getIncomingValueForBlock(Incoming))) 150706c3fb27SDimitry Andric FinalInstructions.insert(InitPHI); 150806c3fb27SDimitry Andric } 150906c3fb27SDimitry Andric return FoundPotentialReduction; 151006c3fb27SDimitry Andric } 151106c3fb27SDimitry Andric 151206c3fb27SDimitry Andric void ComplexDeinterleavingGraph::identifyReductionNodes() { 151306c3fb27SDimitry Andric SmallVector<bool> Processed(ReductionInfo.size(), false); 151406c3fb27SDimitry Andric SmallVector<Instruction *> OperationInstruction; 151506c3fb27SDimitry Andric for (auto &P : ReductionInfo) 151606c3fb27SDimitry Andric OperationInstruction.push_back(P.first); 151706c3fb27SDimitry Andric 151806c3fb27SDimitry Andric // Identify a complex computation by evaluating two reduction operations that 151906c3fb27SDimitry Andric // potentially could be involved 152006c3fb27SDimitry Andric for (size_t i = 0; i < OperationInstruction.size(); ++i) { 152106c3fb27SDimitry Andric if (Processed[i]) 152206c3fb27SDimitry Andric continue; 152306c3fb27SDimitry Andric for (size_t j = i + 1; j < OperationInstruction.size(); ++j) { 152406c3fb27SDimitry Andric if (Processed[j]) 152506c3fb27SDimitry Andric continue; 152606c3fb27SDimitry Andric 152706c3fb27SDimitry Andric auto *Real = OperationInstruction[i]; 152806c3fb27SDimitry Andric auto *Imag = OperationInstruction[j]; 152906c3fb27SDimitry Andric if (Real->getType() != Imag->getType()) 153006c3fb27SDimitry Andric continue; 153106c3fb27SDimitry Andric 153206c3fb27SDimitry Andric RealPHI = ReductionInfo[Real].first; 153306c3fb27SDimitry Andric ImagPHI = ReductionInfo[Imag].first; 153406c3fb27SDimitry Andric PHIsFound = false; 153506c3fb27SDimitry Andric auto Node = identifyNode(Real, Imag); 153606c3fb27SDimitry Andric if (!Node) { 153706c3fb27SDimitry Andric std::swap(Real, Imag); 153806c3fb27SDimitry Andric std::swap(RealPHI, ImagPHI); 153906c3fb27SDimitry Andric Node = identifyNode(Real, Imag); 154006c3fb27SDimitry Andric } 154106c3fb27SDimitry Andric 154206c3fb27SDimitry Andric // If a node is identified and reduction PHINode is used in the chain of 154306c3fb27SDimitry Andric // operations, mark its operation instructions as used to prevent 154406c3fb27SDimitry Andric // re-identification and attach the node to the real part 154506c3fb27SDimitry Andric if (Node && PHIsFound) { 154606c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Identified reduction starting from instructions: " 154706c3fb27SDimitry Andric << *Real << " / " << *Imag << "\n"); 154806c3fb27SDimitry Andric Processed[i] = true; 154906c3fb27SDimitry Andric Processed[j] = true; 155006c3fb27SDimitry Andric auto RootNode = prepareCompositeNode( 155106c3fb27SDimitry Andric ComplexDeinterleavingOperation::ReductionOperation, Real, Imag); 155206c3fb27SDimitry Andric RootNode->addOperand(Node); 155306c3fb27SDimitry Andric RootToNode[Real] = RootNode; 155406c3fb27SDimitry Andric RootToNode[Imag] = RootNode; 155506c3fb27SDimitry Andric submitCompositeNode(RootNode); 155606c3fb27SDimitry Andric break; 155706c3fb27SDimitry Andric } 155806c3fb27SDimitry Andric } 155906c3fb27SDimitry Andric } 156006c3fb27SDimitry Andric 156106c3fb27SDimitry Andric RealPHI = nullptr; 156206c3fb27SDimitry Andric ImagPHI = nullptr; 156306c3fb27SDimitry Andric } 156406c3fb27SDimitry Andric 156506c3fb27SDimitry Andric bool ComplexDeinterleavingGraph::checkNodes() { 156606c3fb27SDimitry Andric // Collect all instructions from roots to leaves 156706c3fb27SDimitry Andric SmallPtrSet<Instruction *, 16> AllInstructions; 156806c3fb27SDimitry Andric SmallVector<Instruction *, 8> Worklist; 156906c3fb27SDimitry Andric for (auto &Pair : RootToNode) 157006c3fb27SDimitry Andric Worklist.push_back(Pair.first); 157106c3fb27SDimitry Andric 157206c3fb27SDimitry Andric // Extract all instructions that are used by all XCMLA/XCADD/ADD/SUB/NEG 157306c3fb27SDimitry Andric // chains 157406c3fb27SDimitry Andric while (!Worklist.empty()) { 157506c3fb27SDimitry Andric auto *I = Worklist.back(); 157606c3fb27SDimitry Andric Worklist.pop_back(); 157706c3fb27SDimitry Andric 157806c3fb27SDimitry Andric if (!AllInstructions.insert(I).second) 157906c3fb27SDimitry Andric continue; 158006c3fb27SDimitry Andric 158106c3fb27SDimitry Andric for (Value *Op : I->operands()) { 158206c3fb27SDimitry Andric if (auto *OpI = dyn_cast<Instruction>(Op)) { 158306c3fb27SDimitry Andric if (!FinalInstructions.count(I)) 158406c3fb27SDimitry Andric Worklist.emplace_back(OpI); 158506c3fb27SDimitry Andric } 158606c3fb27SDimitry Andric } 158706c3fb27SDimitry Andric } 158806c3fb27SDimitry Andric 158906c3fb27SDimitry Andric // Find instructions that have users outside of chain 159006c3fb27SDimitry Andric SmallVector<Instruction *, 2> OuterInstructions; 159106c3fb27SDimitry Andric for (auto *I : AllInstructions) { 159206c3fb27SDimitry Andric // Skip root nodes 159306c3fb27SDimitry Andric if (RootToNode.count(I)) 159406c3fb27SDimitry Andric continue; 159506c3fb27SDimitry Andric 159606c3fb27SDimitry Andric for (User *U : I->users()) { 159706c3fb27SDimitry Andric if (AllInstructions.count(cast<Instruction>(U))) 159806c3fb27SDimitry Andric continue; 159906c3fb27SDimitry Andric 160006c3fb27SDimitry Andric // Found an instruction that is not used by XCMLA/XCADD chain 160106c3fb27SDimitry Andric Worklist.emplace_back(I); 160206c3fb27SDimitry Andric break; 160306c3fb27SDimitry Andric } 160406c3fb27SDimitry Andric } 160506c3fb27SDimitry Andric 160606c3fb27SDimitry Andric // If any instructions are found to be used outside, find and remove roots 160706c3fb27SDimitry Andric // that somehow connect to those instructions. 160806c3fb27SDimitry Andric SmallPtrSet<Instruction *, 16> Visited; 160906c3fb27SDimitry Andric while (!Worklist.empty()) { 161006c3fb27SDimitry Andric auto *I = Worklist.back(); 161106c3fb27SDimitry Andric Worklist.pop_back(); 161206c3fb27SDimitry Andric if (!Visited.insert(I).second) 161306c3fb27SDimitry Andric continue; 161406c3fb27SDimitry Andric 161506c3fb27SDimitry Andric // Found an impacted root node. Removing it from the nodes to be 161606c3fb27SDimitry Andric // deinterleaved 161706c3fb27SDimitry Andric if (RootToNode.count(I)) { 161806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Instruction " << *I 161906c3fb27SDimitry Andric << " could be deinterleaved but its chain of complex " 162006c3fb27SDimitry Andric "operations have an outside user\n"); 162106c3fb27SDimitry Andric RootToNode.erase(I); 162206c3fb27SDimitry Andric } 162306c3fb27SDimitry Andric 162406c3fb27SDimitry Andric if (!AllInstructions.count(I) || FinalInstructions.count(I)) 162506c3fb27SDimitry Andric continue; 162606c3fb27SDimitry Andric 162706c3fb27SDimitry Andric for (User *U : I->users()) 162806c3fb27SDimitry Andric Worklist.emplace_back(cast<Instruction>(U)); 162906c3fb27SDimitry Andric 163006c3fb27SDimitry Andric for (Value *Op : I->operands()) { 163106c3fb27SDimitry Andric if (auto *OpI = dyn_cast<Instruction>(Op)) 163206c3fb27SDimitry Andric Worklist.emplace_back(OpI); 163306c3fb27SDimitry Andric } 163406c3fb27SDimitry Andric } 163506c3fb27SDimitry Andric return !RootToNode.empty(); 163606c3fb27SDimitry Andric } 163706c3fb27SDimitry Andric 163806c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 163906c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifyRoot(Instruction *RootI) { 164006c3fb27SDimitry Andric if (auto *Intrinsic = dyn_cast<IntrinsicInst>(RootI)) { 164106c3fb27SDimitry Andric if (Intrinsic->getIntrinsicID() != 164206c3fb27SDimitry Andric Intrinsic::experimental_vector_interleave2) 164306c3fb27SDimitry Andric return nullptr; 164406c3fb27SDimitry Andric 164506c3fb27SDimitry Andric auto *Real = dyn_cast<Instruction>(Intrinsic->getOperand(0)); 164606c3fb27SDimitry Andric auto *Imag = dyn_cast<Instruction>(Intrinsic->getOperand(1)); 164706c3fb27SDimitry Andric if (!Real || !Imag) 164806c3fb27SDimitry Andric return nullptr; 164906c3fb27SDimitry Andric 165006c3fb27SDimitry Andric return identifyNode(Real, Imag); 165106c3fb27SDimitry Andric } 165206c3fb27SDimitry Andric 165306c3fb27SDimitry Andric auto *SVI = dyn_cast<ShuffleVectorInst>(RootI); 165406c3fb27SDimitry Andric if (!SVI) 165506c3fb27SDimitry Andric return nullptr; 165606c3fb27SDimitry Andric 165706c3fb27SDimitry Andric // Look for a shufflevector that takes separate vectors of the real and 165806c3fb27SDimitry Andric // imaginary components and recombines them into a single vector. 165906c3fb27SDimitry Andric if (!isInterleavingMask(SVI->getShuffleMask())) 166006c3fb27SDimitry Andric return nullptr; 166106c3fb27SDimitry Andric 166206c3fb27SDimitry Andric Instruction *Real; 166306c3fb27SDimitry Andric Instruction *Imag; 166406c3fb27SDimitry Andric if (!match(RootI, m_Shuffle(m_Instruction(Real), m_Instruction(Imag)))) 166506c3fb27SDimitry Andric return nullptr; 166606c3fb27SDimitry Andric 166706c3fb27SDimitry Andric return identifyNode(Real, Imag); 166806c3fb27SDimitry Andric } 166906c3fb27SDimitry Andric 167006c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 167106c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifyDeinterleave(Instruction *Real, 167206c3fb27SDimitry Andric Instruction *Imag) { 167306c3fb27SDimitry Andric Instruction *I = nullptr; 167406c3fb27SDimitry Andric Value *FinalValue = nullptr; 167506c3fb27SDimitry Andric if (match(Real, m_ExtractValue<0>(m_Instruction(I))) && 167606c3fb27SDimitry Andric match(Imag, m_ExtractValue<1>(m_Specific(I))) && 167706c3fb27SDimitry Andric match(I, m_Intrinsic<Intrinsic::experimental_vector_deinterleave2>( 167806c3fb27SDimitry Andric m_Value(FinalValue)))) { 167906c3fb27SDimitry Andric NodePtr PlaceholderNode = prepareCompositeNode( 168006c3fb27SDimitry Andric llvm::ComplexDeinterleavingOperation::Deinterleave, Real, Imag); 168106c3fb27SDimitry Andric PlaceholderNode->ReplacementNode = FinalValue; 168206c3fb27SDimitry Andric FinalInstructions.insert(Real); 168306c3fb27SDimitry Andric FinalInstructions.insert(Imag); 168406c3fb27SDimitry Andric return submitCompositeNode(PlaceholderNode); 168506c3fb27SDimitry Andric } 168606c3fb27SDimitry Andric 1687bdd1243dSDimitry Andric auto *RealShuffle = dyn_cast<ShuffleVectorInst>(Real); 1688bdd1243dSDimitry Andric auto *ImagShuffle = dyn_cast<ShuffleVectorInst>(Imag); 168906c3fb27SDimitry Andric if (!RealShuffle || !ImagShuffle) { 169006c3fb27SDimitry Andric if (RealShuffle || ImagShuffle) 169106c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " - There's a shuffle where there shouldn't be.\n"); 169206c3fb27SDimitry Andric return nullptr; 169306c3fb27SDimitry Andric } 169406c3fb27SDimitry Andric 1695bdd1243dSDimitry Andric Value *RealOp1 = RealShuffle->getOperand(1); 1696bdd1243dSDimitry Andric if (!isa<UndefValue>(RealOp1) && !isa<ConstantAggregateZero>(RealOp1)) { 1697bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - RealOp1 is not undef or zero.\n"); 1698bdd1243dSDimitry Andric return nullptr; 1699bdd1243dSDimitry Andric } 1700bdd1243dSDimitry Andric Value *ImagOp1 = ImagShuffle->getOperand(1); 1701bdd1243dSDimitry Andric if (!isa<UndefValue>(ImagOp1) && !isa<ConstantAggregateZero>(ImagOp1)) { 1702bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - ImagOp1 is not undef or zero.\n"); 1703bdd1243dSDimitry Andric return nullptr; 1704bdd1243dSDimitry Andric } 1705bdd1243dSDimitry Andric 1706bdd1243dSDimitry Andric Value *RealOp0 = RealShuffle->getOperand(0); 1707bdd1243dSDimitry Andric Value *ImagOp0 = ImagShuffle->getOperand(0); 1708bdd1243dSDimitry Andric 1709bdd1243dSDimitry Andric if (RealOp0 != ImagOp0) { 1710bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Shuffle operands are not equal.\n"); 1711bdd1243dSDimitry Andric return nullptr; 1712bdd1243dSDimitry Andric } 1713bdd1243dSDimitry Andric 1714bdd1243dSDimitry Andric ArrayRef<int> RealMask = RealShuffle->getShuffleMask(); 1715bdd1243dSDimitry Andric ArrayRef<int> ImagMask = ImagShuffle->getShuffleMask(); 1716bdd1243dSDimitry Andric if (!isDeinterleavingMask(RealMask) || !isDeinterleavingMask(ImagMask)) { 1717bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Masks are not deinterleaving.\n"); 1718bdd1243dSDimitry Andric return nullptr; 1719bdd1243dSDimitry Andric } 1720bdd1243dSDimitry Andric 1721bdd1243dSDimitry Andric if (RealMask[0] != 0 || ImagMask[0] != 1) { 1722bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Masks do not have the correct initial value.\n"); 1723bdd1243dSDimitry Andric return nullptr; 1724bdd1243dSDimitry Andric } 1725bdd1243dSDimitry Andric 1726bdd1243dSDimitry Andric // Type checking, the shuffle type should be a vector type of the same 1727bdd1243dSDimitry Andric // scalar type, but half the size 1728bdd1243dSDimitry Andric auto CheckType = [&](ShuffleVectorInst *Shuffle) { 1729bdd1243dSDimitry Andric Value *Op = Shuffle->getOperand(0); 1730bdd1243dSDimitry Andric auto *ShuffleTy = cast<FixedVectorType>(Shuffle->getType()); 1731bdd1243dSDimitry Andric auto *OpTy = cast<FixedVectorType>(Op->getType()); 1732bdd1243dSDimitry Andric 1733bdd1243dSDimitry Andric if (OpTy->getScalarType() != ShuffleTy->getScalarType()) 1734bdd1243dSDimitry Andric return false; 1735bdd1243dSDimitry Andric if ((ShuffleTy->getNumElements() * 2) != OpTy->getNumElements()) 1736bdd1243dSDimitry Andric return false; 1737bdd1243dSDimitry Andric 1738bdd1243dSDimitry Andric return true; 1739bdd1243dSDimitry Andric }; 1740bdd1243dSDimitry Andric 1741bdd1243dSDimitry Andric auto CheckDeinterleavingShuffle = [&](ShuffleVectorInst *Shuffle) -> bool { 1742bdd1243dSDimitry Andric if (!CheckType(Shuffle)) 1743bdd1243dSDimitry Andric return false; 1744bdd1243dSDimitry Andric 1745bdd1243dSDimitry Andric ArrayRef<int> Mask = Shuffle->getShuffleMask(); 1746bdd1243dSDimitry Andric int Last = *Mask.rbegin(); 1747bdd1243dSDimitry Andric 1748bdd1243dSDimitry Andric Value *Op = Shuffle->getOperand(0); 1749bdd1243dSDimitry Andric auto *OpTy = cast<FixedVectorType>(Op->getType()); 1750bdd1243dSDimitry Andric int NumElements = OpTy->getNumElements(); 1751bdd1243dSDimitry Andric 1752bdd1243dSDimitry Andric // Ensure that the deinterleaving shuffle only pulls from the first 1753bdd1243dSDimitry Andric // shuffle operand. 1754bdd1243dSDimitry Andric return Last < NumElements; 1755bdd1243dSDimitry Andric }; 1756bdd1243dSDimitry Andric 1757bdd1243dSDimitry Andric if (RealShuffle->getType() != ImagShuffle->getType()) { 1758bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - Shuffle types aren't equal.\n"); 1759bdd1243dSDimitry Andric return nullptr; 1760bdd1243dSDimitry Andric } 1761bdd1243dSDimitry Andric if (!CheckDeinterleavingShuffle(RealShuffle)) { 1762bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - RealShuffle is invalid type.\n"); 1763bdd1243dSDimitry Andric return nullptr; 1764bdd1243dSDimitry Andric } 1765bdd1243dSDimitry Andric if (!CheckDeinterleavingShuffle(ImagShuffle)) { 1766bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " - ImagShuffle is invalid type.\n"); 1767bdd1243dSDimitry Andric return nullptr; 1768bdd1243dSDimitry Andric } 1769bdd1243dSDimitry Andric 1770bdd1243dSDimitry Andric NodePtr PlaceholderNode = 177106c3fb27SDimitry Andric prepareCompositeNode(llvm::ComplexDeinterleavingOperation::Deinterleave, 1772bdd1243dSDimitry Andric RealShuffle, ImagShuffle); 1773bdd1243dSDimitry Andric PlaceholderNode->ReplacementNode = RealShuffle->getOperand(0); 177406c3fb27SDimitry Andric FinalInstructions.insert(RealShuffle); 177506c3fb27SDimitry Andric FinalInstructions.insert(ImagShuffle); 1776bdd1243dSDimitry Andric return submitCompositeNode(PlaceholderNode); 1777bdd1243dSDimitry Andric } 1778bdd1243dSDimitry Andric 177906c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 178006c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifySplat(Value *R, Value *I) { 178106c3fb27SDimitry Andric auto IsSplat = [](Value *V) -> bool { 178206c3fb27SDimitry Andric // Fixed-width vector with constants 178306c3fb27SDimitry Andric if (isa<ConstantDataVector>(V)) 178406c3fb27SDimitry Andric return true; 1785bdd1243dSDimitry Andric 178606c3fb27SDimitry Andric VectorType *VTy; 178706c3fb27SDimitry Andric ArrayRef<int> Mask; 178806c3fb27SDimitry Andric // Splats are represented differently depending on whether the repeated 178906c3fb27SDimitry Andric // value is a constant or an Instruction 179006c3fb27SDimitry Andric if (auto *Const = dyn_cast<ConstantExpr>(V)) { 179106c3fb27SDimitry Andric if (Const->getOpcode() != Instruction::ShuffleVector) 1792bdd1243dSDimitry Andric return false; 179306c3fb27SDimitry Andric VTy = cast<VectorType>(Const->getType()); 179406c3fb27SDimitry Andric Mask = Const->getShuffleMask(); 179506c3fb27SDimitry Andric } else if (auto *Shuf = dyn_cast<ShuffleVectorInst>(V)) { 179606c3fb27SDimitry Andric VTy = Shuf->getType(); 179706c3fb27SDimitry Andric Mask = Shuf->getShuffleMask(); 179806c3fb27SDimitry Andric } else { 1799bdd1243dSDimitry Andric return false; 1800bdd1243dSDimitry Andric } 180106c3fb27SDimitry Andric 180206c3fb27SDimitry Andric // When the data type is <1 x Type>, it's not possible to differentiate 180306c3fb27SDimitry Andric // between the ComplexDeinterleaving::Deinterleave and 180406c3fb27SDimitry Andric // ComplexDeinterleaving::Splat operations. 180506c3fb27SDimitry Andric if (!VTy->isScalableTy() && VTy->getElementCount().getKnownMinValue() == 1) 180606c3fb27SDimitry Andric return false; 180706c3fb27SDimitry Andric 180806c3fb27SDimitry Andric return all_equal(Mask) && Mask[0] == 0; 180906c3fb27SDimitry Andric }; 181006c3fb27SDimitry Andric 181106c3fb27SDimitry Andric if (!IsSplat(R) || !IsSplat(I)) 181206c3fb27SDimitry Andric return nullptr; 181306c3fb27SDimitry Andric 181406c3fb27SDimitry Andric auto *Real = dyn_cast<Instruction>(R); 181506c3fb27SDimitry Andric auto *Imag = dyn_cast<Instruction>(I); 181606c3fb27SDimitry Andric if ((!Real && Imag) || (Real && !Imag)) 181706c3fb27SDimitry Andric return nullptr; 181806c3fb27SDimitry Andric 181906c3fb27SDimitry Andric if (Real && Imag) { 182006c3fb27SDimitry Andric // Non-constant splats should be in the same basic block 182106c3fb27SDimitry Andric if (Real->getParent() != Imag->getParent()) 182206c3fb27SDimitry Andric return nullptr; 182306c3fb27SDimitry Andric 182406c3fb27SDimitry Andric FinalInstructions.insert(Real); 182506c3fb27SDimitry Andric FinalInstructions.insert(Imag); 1826bdd1243dSDimitry Andric } 182706c3fb27SDimitry Andric NodePtr PlaceholderNode = 182806c3fb27SDimitry Andric prepareCompositeNode(ComplexDeinterleavingOperation::Splat, R, I); 182906c3fb27SDimitry Andric return submitCompositeNode(PlaceholderNode); 1830bdd1243dSDimitry Andric } 1831bdd1243dSDimitry Andric 183206c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 183306c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifyPHINode(Instruction *Real, 183406c3fb27SDimitry Andric Instruction *Imag) { 183506c3fb27SDimitry Andric if (Real != RealPHI || Imag != ImagPHI) 183606c3fb27SDimitry Andric return nullptr; 183706c3fb27SDimitry Andric 183806c3fb27SDimitry Andric PHIsFound = true; 183906c3fb27SDimitry Andric NodePtr PlaceholderNode = prepareCompositeNode( 184006c3fb27SDimitry Andric ComplexDeinterleavingOperation::ReductionPHI, Real, Imag); 184106c3fb27SDimitry Andric return submitCompositeNode(PlaceholderNode); 184206c3fb27SDimitry Andric } 184306c3fb27SDimitry Andric 184406c3fb27SDimitry Andric ComplexDeinterleavingGraph::NodePtr 184506c3fb27SDimitry Andric ComplexDeinterleavingGraph::identifySelectNode(Instruction *Real, 184606c3fb27SDimitry Andric Instruction *Imag) { 184706c3fb27SDimitry Andric auto *SelectReal = dyn_cast<SelectInst>(Real); 184806c3fb27SDimitry Andric auto *SelectImag = dyn_cast<SelectInst>(Imag); 184906c3fb27SDimitry Andric if (!SelectReal || !SelectImag) 185006c3fb27SDimitry Andric return nullptr; 185106c3fb27SDimitry Andric 185206c3fb27SDimitry Andric Instruction *MaskA, *MaskB; 185306c3fb27SDimitry Andric Instruction *AR, *AI, *RA, *BI; 185406c3fb27SDimitry Andric if (!match(Real, m_Select(m_Instruction(MaskA), m_Instruction(AR), 185506c3fb27SDimitry Andric m_Instruction(RA))) || 185606c3fb27SDimitry Andric !match(Imag, m_Select(m_Instruction(MaskB), m_Instruction(AI), 185706c3fb27SDimitry Andric m_Instruction(BI)))) 185806c3fb27SDimitry Andric return nullptr; 185906c3fb27SDimitry Andric 186006c3fb27SDimitry Andric if (MaskA != MaskB && !MaskA->isIdenticalTo(MaskB)) 186106c3fb27SDimitry Andric return nullptr; 186206c3fb27SDimitry Andric 186306c3fb27SDimitry Andric if (!MaskA->getType()->isVectorTy()) 186406c3fb27SDimitry Andric return nullptr; 186506c3fb27SDimitry Andric 186606c3fb27SDimitry Andric auto NodeA = identifyNode(AR, AI); 186706c3fb27SDimitry Andric if (!NodeA) 186806c3fb27SDimitry Andric return nullptr; 186906c3fb27SDimitry Andric 187006c3fb27SDimitry Andric auto NodeB = identifyNode(RA, BI); 187106c3fb27SDimitry Andric if (!NodeB) 187206c3fb27SDimitry Andric return nullptr; 187306c3fb27SDimitry Andric 187406c3fb27SDimitry Andric NodePtr PlaceholderNode = prepareCompositeNode( 187506c3fb27SDimitry Andric ComplexDeinterleavingOperation::ReductionSelect, Real, Imag); 187606c3fb27SDimitry Andric PlaceholderNode->addOperand(NodeA); 187706c3fb27SDimitry Andric PlaceholderNode->addOperand(NodeB); 187806c3fb27SDimitry Andric FinalInstructions.insert(MaskA); 187906c3fb27SDimitry Andric FinalInstructions.insert(MaskB); 188006c3fb27SDimitry Andric return submitCompositeNode(PlaceholderNode); 188106c3fb27SDimitry Andric } 188206c3fb27SDimitry Andric 188306c3fb27SDimitry Andric static Value *replaceSymmetricNode(IRBuilderBase &B, unsigned Opcode, 188406c3fb27SDimitry Andric std::optional<FastMathFlags> Flags, 188506c3fb27SDimitry Andric Value *InputA, Value *InputB) { 188606c3fb27SDimitry Andric Value *I; 188706c3fb27SDimitry Andric switch (Opcode) { 188806c3fb27SDimitry Andric case Instruction::FNeg: 188906c3fb27SDimitry Andric I = B.CreateFNeg(InputA); 189006c3fb27SDimitry Andric break; 189106c3fb27SDimitry Andric case Instruction::FAdd: 189206c3fb27SDimitry Andric I = B.CreateFAdd(InputA, InputB); 189306c3fb27SDimitry Andric break; 189406c3fb27SDimitry Andric case Instruction::Add: 189506c3fb27SDimitry Andric I = B.CreateAdd(InputA, InputB); 189606c3fb27SDimitry Andric break; 189706c3fb27SDimitry Andric case Instruction::FSub: 189806c3fb27SDimitry Andric I = B.CreateFSub(InputA, InputB); 189906c3fb27SDimitry Andric break; 190006c3fb27SDimitry Andric case Instruction::Sub: 190106c3fb27SDimitry Andric I = B.CreateSub(InputA, InputB); 190206c3fb27SDimitry Andric break; 190306c3fb27SDimitry Andric case Instruction::FMul: 190406c3fb27SDimitry Andric I = B.CreateFMul(InputA, InputB); 190506c3fb27SDimitry Andric break; 190606c3fb27SDimitry Andric case Instruction::Mul: 190706c3fb27SDimitry Andric I = B.CreateMul(InputA, InputB); 190806c3fb27SDimitry Andric break; 190906c3fb27SDimitry Andric default: 191006c3fb27SDimitry Andric llvm_unreachable("Incorrect symmetric opcode"); 191106c3fb27SDimitry Andric } 191206c3fb27SDimitry Andric if (Flags) 191306c3fb27SDimitry Andric cast<Instruction>(I)->setFastMathFlags(*Flags); 191406c3fb27SDimitry Andric return I; 191506c3fb27SDimitry Andric } 191606c3fb27SDimitry Andric 191706c3fb27SDimitry Andric Value *ComplexDeinterleavingGraph::replaceNode(IRBuilderBase &Builder, 191806c3fb27SDimitry Andric RawNodePtr Node) { 1919bdd1243dSDimitry Andric if (Node->ReplacementNode) 1920bdd1243dSDimitry Andric return Node->ReplacementNode; 1921bdd1243dSDimitry Andric 192206c3fb27SDimitry Andric auto ReplaceOperandIfExist = [&](RawNodePtr &Node, unsigned Idx) -> Value * { 192306c3fb27SDimitry Andric return Node->Operands.size() > Idx 192406c3fb27SDimitry Andric ? replaceNode(Builder, Node->Operands[Idx]) 192506c3fb27SDimitry Andric : nullptr; 192606c3fb27SDimitry Andric }; 1927bdd1243dSDimitry Andric 192806c3fb27SDimitry Andric Value *ReplacementNode; 192906c3fb27SDimitry Andric switch (Node->Operation) { 193006c3fb27SDimitry Andric case ComplexDeinterleavingOperation::CAdd: 193106c3fb27SDimitry Andric case ComplexDeinterleavingOperation::CMulPartial: 193206c3fb27SDimitry Andric case ComplexDeinterleavingOperation::Symmetric: { 193306c3fb27SDimitry Andric Value *Input0 = ReplaceOperandIfExist(Node, 0); 193406c3fb27SDimitry Andric Value *Input1 = ReplaceOperandIfExist(Node, 1); 193506c3fb27SDimitry Andric Value *Accumulator = ReplaceOperandIfExist(Node, 2); 193606c3fb27SDimitry Andric assert(!Input1 || (Input0->getType() == Input1->getType() && 193706c3fb27SDimitry Andric "Node inputs need to be of the same type")); 193806c3fb27SDimitry Andric assert(!Accumulator || 193906c3fb27SDimitry Andric (Input0->getType() == Accumulator->getType() && 194006c3fb27SDimitry Andric "Accumulator and input need to be of the same type")); 194106c3fb27SDimitry Andric if (Node->Operation == ComplexDeinterleavingOperation::Symmetric) 194206c3fb27SDimitry Andric ReplacementNode = replaceSymmetricNode(Builder, Node->Opcode, Node->Flags, 194306c3fb27SDimitry Andric Input0, Input1); 194406c3fb27SDimitry Andric else 194506c3fb27SDimitry Andric ReplacementNode = TL->createComplexDeinterleavingIR( 194606c3fb27SDimitry Andric Builder, Node->Operation, Node->Rotation, Input0, Input1, 194706c3fb27SDimitry Andric Accumulator); 194806c3fb27SDimitry Andric break; 194906c3fb27SDimitry Andric } 195006c3fb27SDimitry Andric case ComplexDeinterleavingOperation::Deinterleave: 195106c3fb27SDimitry Andric llvm_unreachable("Deinterleave node should already have ReplacementNode"); 195206c3fb27SDimitry Andric break; 195306c3fb27SDimitry Andric case ComplexDeinterleavingOperation::Splat: { 195406c3fb27SDimitry Andric auto *NewTy = VectorType::getDoubleElementsVectorType( 195506c3fb27SDimitry Andric cast<VectorType>(Node->Real->getType())); 195606c3fb27SDimitry Andric auto *R = dyn_cast<Instruction>(Node->Real); 195706c3fb27SDimitry Andric auto *I = dyn_cast<Instruction>(Node->Imag); 195806c3fb27SDimitry Andric if (R && I) { 195906c3fb27SDimitry Andric // Splats that are not constant are interleaved where they are located 196006c3fb27SDimitry Andric Instruction *InsertPoint = (I->comesBefore(R) ? R : I)->getNextNode(); 196106c3fb27SDimitry Andric IRBuilder<> IRB(InsertPoint); 196206c3fb27SDimitry Andric ReplacementNode = 196306c3fb27SDimitry Andric IRB.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, NewTy, 196406c3fb27SDimitry Andric {Node->Real, Node->Imag}); 196506c3fb27SDimitry Andric } else { 196606c3fb27SDimitry Andric ReplacementNode = 196706c3fb27SDimitry Andric Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, 196806c3fb27SDimitry Andric NewTy, {Node->Real, Node->Imag}); 196906c3fb27SDimitry Andric } 197006c3fb27SDimitry Andric break; 197106c3fb27SDimitry Andric } 197206c3fb27SDimitry Andric case ComplexDeinterleavingOperation::ReductionPHI: { 197306c3fb27SDimitry Andric // If Operation is ReductionPHI, a new empty PHINode is created. 197406c3fb27SDimitry Andric // It is filled later when the ReductionOperation is processed. 197506c3fb27SDimitry Andric auto *VTy = cast<VectorType>(Node->Real->getType()); 197606c3fb27SDimitry Andric auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy); 197706c3fb27SDimitry Andric auto *NewPHI = PHINode::Create(NewVTy, 0, "", BackEdge->getFirstNonPHI()); 197806c3fb27SDimitry Andric OldToNewPHI[dyn_cast<PHINode>(Node->Real)] = NewPHI; 197906c3fb27SDimitry Andric ReplacementNode = NewPHI; 198006c3fb27SDimitry Andric break; 198106c3fb27SDimitry Andric } 198206c3fb27SDimitry Andric case ComplexDeinterleavingOperation::ReductionOperation: 198306c3fb27SDimitry Andric ReplacementNode = replaceNode(Builder, Node->Operands[0]); 198406c3fb27SDimitry Andric processReductionOperation(ReplacementNode, Node); 198506c3fb27SDimitry Andric break; 198606c3fb27SDimitry Andric case ComplexDeinterleavingOperation::ReductionSelect: { 198706c3fb27SDimitry Andric auto *MaskReal = cast<Instruction>(Node->Real)->getOperand(0); 198806c3fb27SDimitry Andric auto *MaskImag = cast<Instruction>(Node->Imag)->getOperand(0); 198906c3fb27SDimitry Andric auto *A = replaceNode(Builder, Node->Operands[0]); 199006c3fb27SDimitry Andric auto *B = replaceNode(Builder, Node->Operands[1]); 199106c3fb27SDimitry Andric auto *NewMaskTy = VectorType::getDoubleElementsVectorType( 199206c3fb27SDimitry Andric cast<VectorType>(MaskReal->getType())); 199306c3fb27SDimitry Andric auto *NewMask = 199406c3fb27SDimitry Andric Builder.CreateIntrinsic(Intrinsic::experimental_vector_interleave2, 199506c3fb27SDimitry Andric NewMaskTy, {MaskReal, MaskImag}); 199606c3fb27SDimitry Andric ReplacementNode = Builder.CreateSelect(NewMask, A, B); 199706c3fb27SDimitry Andric break; 199806c3fb27SDimitry Andric } 199906c3fb27SDimitry Andric } 2000bdd1243dSDimitry Andric 200106c3fb27SDimitry Andric assert(ReplacementNode && "Target failed to create Intrinsic call."); 2002bdd1243dSDimitry Andric NumComplexTransformations += 1; 200306c3fb27SDimitry Andric Node->ReplacementNode = ReplacementNode; 200406c3fb27SDimitry Andric return ReplacementNode; 200506c3fb27SDimitry Andric } 200606c3fb27SDimitry Andric 200706c3fb27SDimitry Andric void ComplexDeinterleavingGraph::processReductionOperation( 200806c3fb27SDimitry Andric Value *OperationReplacement, RawNodePtr Node) { 200906c3fb27SDimitry Andric auto *Real = cast<Instruction>(Node->Real); 201006c3fb27SDimitry Andric auto *Imag = cast<Instruction>(Node->Imag); 201106c3fb27SDimitry Andric auto *OldPHIReal = ReductionInfo[Real].first; 201206c3fb27SDimitry Andric auto *OldPHIImag = ReductionInfo[Imag].first; 201306c3fb27SDimitry Andric auto *NewPHI = OldToNewPHI[OldPHIReal]; 201406c3fb27SDimitry Andric 201506c3fb27SDimitry Andric auto *VTy = cast<VectorType>(Real->getType()); 201606c3fb27SDimitry Andric auto *NewVTy = VectorType::getDoubleElementsVectorType(VTy); 201706c3fb27SDimitry Andric 201806c3fb27SDimitry Andric // We have to interleave initial origin values coming from IncomingBlock 201906c3fb27SDimitry Andric Value *InitReal = OldPHIReal->getIncomingValueForBlock(Incoming); 202006c3fb27SDimitry Andric Value *InitImag = OldPHIImag->getIncomingValueForBlock(Incoming); 202106c3fb27SDimitry Andric 202206c3fb27SDimitry Andric IRBuilder<> Builder(Incoming->getTerminator()); 202306c3fb27SDimitry Andric auto *NewInit = Builder.CreateIntrinsic( 202406c3fb27SDimitry Andric Intrinsic::experimental_vector_interleave2, NewVTy, {InitReal, InitImag}); 202506c3fb27SDimitry Andric 202606c3fb27SDimitry Andric NewPHI->addIncoming(NewInit, Incoming); 202706c3fb27SDimitry Andric NewPHI->addIncoming(OperationReplacement, BackEdge); 202806c3fb27SDimitry Andric 202906c3fb27SDimitry Andric // Deinterleave complex vector outside of loop so that it can be finally 203006c3fb27SDimitry Andric // reduced 203106c3fb27SDimitry Andric auto *FinalReductionReal = ReductionInfo[Real].second; 203206c3fb27SDimitry Andric auto *FinalReductionImag = ReductionInfo[Imag].second; 203306c3fb27SDimitry Andric 203406c3fb27SDimitry Andric Builder.SetInsertPoint( 203506c3fb27SDimitry Andric &*FinalReductionReal->getParent()->getFirstInsertionPt()); 203606c3fb27SDimitry Andric auto *Deinterleave = Builder.CreateIntrinsic( 203706c3fb27SDimitry Andric Intrinsic::experimental_vector_deinterleave2, 203806c3fb27SDimitry Andric OperationReplacement->getType(), OperationReplacement); 203906c3fb27SDimitry Andric 204006c3fb27SDimitry Andric auto *NewReal = Builder.CreateExtractValue(Deinterleave, (uint64_t)0); 204106c3fb27SDimitry Andric FinalReductionReal->replaceUsesOfWith(Real, NewReal); 204206c3fb27SDimitry Andric 204306c3fb27SDimitry Andric Builder.SetInsertPoint(FinalReductionImag); 204406c3fb27SDimitry Andric auto *NewImag = Builder.CreateExtractValue(Deinterleave, 1); 204506c3fb27SDimitry Andric FinalReductionImag->replaceUsesOfWith(Imag, NewImag); 2046bdd1243dSDimitry Andric } 2047bdd1243dSDimitry Andric 2048bdd1243dSDimitry Andric void ComplexDeinterleavingGraph::replaceNodes() { 204906c3fb27SDimitry Andric SmallVector<Instruction *, 16> DeadInstrRoots; 205006c3fb27SDimitry Andric for (auto *RootInstruction : OrderedRoots) { 205106c3fb27SDimitry Andric // Check if this potential root went through check process and we can 205206c3fb27SDimitry Andric // deinterleave it 205306c3fb27SDimitry Andric if (!RootToNode.count(RootInstruction)) 205406c3fb27SDimitry Andric continue; 205506c3fb27SDimitry Andric 205606c3fb27SDimitry Andric IRBuilder<> Builder(RootInstruction); 205706c3fb27SDimitry Andric auto RootNode = RootToNode[RootInstruction]; 205806c3fb27SDimitry Andric Value *R = replaceNode(Builder, RootNode.get()); 205906c3fb27SDimitry Andric 206006c3fb27SDimitry Andric if (RootNode->Operation == 206106c3fb27SDimitry Andric ComplexDeinterleavingOperation::ReductionOperation) { 206206c3fb27SDimitry Andric auto *RootReal = cast<Instruction>(RootNode->Real); 206306c3fb27SDimitry Andric auto *RootImag = cast<Instruction>(RootNode->Imag); 206406c3fb27SDimitry Andric ReductionInfo[RootReal].first->removeIncomingValue(BackEdge); 206506c3fb27SDimitry Andric ReductionInfo[RootImag].first->removeIncomingValue(BackEdge); 206606c3fb27SDimitry Andric DeadInstrRoots.push_back(cast<Instruction>(RootReal)); 206706c3fb27SDimitry Andric DeadInstrRoots.push_back(cast<Instruction>(RootImag)); 206806c3fb27SDimitry Andric } else { 206906c3fb27SDimitry Andric assert(R && "Unable to find replacement for RootInstruction"); 207006c3fb27SDimitry Andric DeadInstrRoots.push_back(RootInstruction); 207106c3fb27SDimitry Andric RootInstruction->replaceAllUsesWith(R); 207206c3fb27SDimitry Andric } 2073bdd1243dSDimitry Andric } 2074bdd1243dSDimitry Andric 207506c3fb27SDimitry Andric for (auto *I : DeadInstrRoots) 207606c3fb27SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(I, TLI); 2077bdd1243dSDimitry Andric } 2078