15ffd83dbSDimitry Andric //===- llvm/Support/SuffixTree.cpp - Implement Suffix Tree ------*- C++ -*-===// 25ffd83dbSDimitry Andric // 35ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric // 75ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 85ffd83dbSDimitry Andric // 95ffd83dbSDimitry Andric // This file implements the Suffix Tree class. 105ffd83dbSDimitry Andric // 115ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 125ffd83dbSDimitry Andric 135ffd83dbSDimitry Andric #include "llvm/Support/SuffixTree.h" 145ffd83dbSDimitry Andric #include "llvm/Support/Allocator.h" 1506c3fb27SDimitry Andric #include "llvm/Support/Casting.h" 1606c3fb27SDimitry Andric #include "llvm/Support/SuffixTreeNode.h" 175ffd83dbSDimitry Andric 185ffd83dbSDimitry Andric using namespace llvm; 195ffd83dbSDimitry Andric 2006c3fb27SDimitry Andric /// \returns the number of elements in the substring associated with \p N. 2106c3fb27SDimitry Andric static size_t numElementsInSubstring(const SuffixTreeNode *N) { 2206c3fb27SDimitry Andric assert(N && "Got a null node?"); 2306c3fb27SDimitry Andric if (auto *Internal = dyn_cast<SuffixTreeInternalNode>(N)) 2406c3fb27SDimitry Andric if (Internal->isRoot()) 2506c3fb27SDimitry Andric return 0; 2606c3fb27SDimitry Andric return N->getEndIdx() - N->getStartIdx() + 1; 2706c3fb27SDimitry Andric } 2806c3fb27SDimitry Andric 29*0fca6ea1SDimitry Andric SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str, 30*0fca6ea1SDimitry Andric bool OutlinerLeafDescendants) 31*0fca6ea1SDimitry Andric : Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) { 3206c3fb27SDimitry Andric Root = insertRoot(); 335ffd83dbSDimitry Andric Active.Node = Root; 345ffd83dbSDimitry Andric 355ffd83dbSDimitry Andric // Keep track of the number of suffixes we have to add of the current 365ffd83dbSDimitry Andric // prefix. 375ffd83dbSDimitry Andric unsigned SuffixesToAdd = 0; 385ffd83dbSDimitry Andric 395ffd83dbSDimitry Andric // Construct the suffix tree iteratively on each prefix of the string. 405ffd83dbSDimitry Andric // PfxEndIdx is the end index of the current prefix. 415ffd83dbSDimitry Andric // End is one past the last element in the string. 425ffd83dbSDimitry Andric for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) { 435ffd83dbSDimitry Andric SuffixesToAdd++; 445ffd83dbSDimitry Andric LeafEndIdx = PfxEndIdx; // Extend each of the leaves. 455ffd83dbSDimitry Andric SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd); 465ffd83dbSDimitry Andric } 475ffd83dbSDimitry Andric 485ffd83dbSDimitry Andric // Set the suffix indices of each leaf. 495ffd83dbSDimitry Andric assert(Root && "Root node can't be nullptr!"); 505ffd83dbSDimitry Andric setSuffixIndices(); 51*0fca6ea1SDimitry Andric 52*0fca6ea1SDimitry Andric // Collect all leaf nodes of the suffix tree. And for each internal node, 53*0fca6ea1SDimitry Andric // record the range of leaf nodes that are descendants of it. 54*0fca6ea1SDimitry Andric if (OutlinerLeafDescendants) 55*0fca6ea1SDimitry Andric setLeafNodes(); 565ffd83dbSDimitry Andric } 575ffd83dbSDimitry Andric 5806c3fb27SDimitry Andric SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent, 595ffd83dbSDimitry Andric unsigned StartIdx, unsigned Edge) { 605ffd83dbSDimitry Andric assert(StartIdx <= LeafEndIdx && "String can't start after it ends!"); 6106c3fb27SDimitry Andric auto *N = new (LeafNodeAllocator.Allocate()) 6206c3fb27SDimitry Andric SuffixTreeLeafNode(StartIdx, &LeafEndIdx); 635ffd83dbSDimitry Andric Parent.Children[Edge] = N; 645ffd83dbSDimitry Andric return N; 655ffd83dbSDimitry Andric } 665ffd83dbSDimitry Andric 6706c3fb27SDimitry Andric SuffixTreeInternalNode * 6806c3fb27SDimitry Andric SuffixTree::insertInternalNode(SuffixTreeInternalNode *Parent, 6906c3fb27SDimitry Andric unsigned StartIdx, unsigned EndIdx, 7006c3fb27SDimitry Andric unsigned Edge) { 715ffd83dbSDimitry Andric assert(StartIdx <= EndIdx && "String can't start after it ends!"); 7206c3fb27SDimitry Andric assert(!(!Parent && StartIdx != SuffixTreeNode::EmptyIdx) && 735ffd83dbSDimitry Andric "Non-root internal nodes must have parents!"); 7406c3fb27SDimitry Andric auto *N = new (InternalNodeAllocator.Allocate()) 7506c3fb27SDimitry Andric SuffixTreeInternalNode(StartIdx, EndIdx, Root); 765ffd83dbSDimitry Andric if (Parent) 775ffd83dbSDimitry Andric Parent->Children[Edge] = N; 785ffd83dbSDimitry Andric return N; 795ffd83dbSDimitry Andric } 805ffd83dbSDimitry Andric 8106c3fb27SDimitry Andric SuffixTreeInternalNode *SuffixTree::insertRoot() { 8206c3fb27SDimitry Andric return insertInternalNode(/*Parent = */ nullptr, SuffixTreeNode::EmptyIdx, 8306c3fb27SDimitry Andric SuffixTreeNode::EmptyIdx, /*Edge = */ 0); 8406c3fb27SDimitry Andric } 8506c3fb27SDimitry Andric 865ffd83dbSDimitry Andric void SuffixTree::setSuffixIndices() { 875ffd83dbSDimitry Andric // List of nodes we need to visit along with the current length of the 885ffd83dbSDimitry Andric // string. 8906c3fb27SDimitry Andric SmallVector<std::pair<SuffixTreeNode *, unsigned>> ToVisit; 905ffd83dbSDimitry Andric 915ffd83dbSDimitry Andric // Current node being visited. 925ffd83dbSDimitry Andric SuffixTreeNode *CurrNode = Root; 935ffd83dbSDimitry Andric 945ffd83dbSDimitry Andric // Sum of the lengths of the nodes down the path to the current one. 955ffd83dbSDimitry Andric unsigned CurrNodeLen = 0; 965ffd83dbSDimitry Andric ToVisit.push_back({CurrNode, CurrNodeLen}); 975ffd83dbSDimitry Andric while (!ToVisit.empty()) { 985ffd83dbSDimitry Andric std::tie(CurrNode, CurrNodeLen) = ToVisit.back(); 995ffd83dbSDimitry Andric ToVisit.pop_back(); 10006c3fb27SDimitry Andric // Length of the current node from the root down to here. 10106c3fb27SDimitry Andric CurrNode->setConcatLen(CurrNodeLen); 10206c3fb27SDimitry Andric if (auto *InternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) 10306c3fb27SDimitry Andric for (auto &ChildPair : InternalNode->Children) { 1045ffd83dbSDimitry Andric assert(ChildPair.second && "Node had a null child!"); 1055ffd83dbSDimitry Andric ToVisit.push_back( 10606c3fb27SDimitry Andric {ChildPair.second, 10706c3fb27SDimitry Andric CurrNodeLen + numElementsInSubstring(ChildPair.second)}); 1085ffd83dbSDimitry Andric } 1095ffd83dbSDimitry Andric // No children, so we are at the end of the string. 11006c3fb27SDimitry Andric if (auto *LeafNode = dyn_cast<SuffixTreeLeafNode>(CurrNode)) 11106c3fb27SDimitry Andric LeafNode->setSuffixIdx(Str.size() - CurrNodeLen); 1125ffd83dbSDimitry Andric } 1135ffd83dbSDimitry Andric } 1145ffd83dbSDimitry Andric 115*0fca6ea1SDimitry Andric void SuffixTree::setLeafNodes() { 116*0fca6ea1SDimitry Andric // A stack that keeps track of nodes to visit for post-order DFS traversal. 117*0fca6ea1SDimitry Andric SmallVector<SuffixTreeNode *> ToVisit; 118*0fca6ea1SDimitry Andric ToVisit.push_back(Root); 119*0fca6ea1SDimitry Andric 120*0fca6ea1SDimitry Andric // This keeps track of the index of the next leaf node to be added to 121*0fca6ea1SDimitry Andric // the LeafNodes vector of the suffix tree. 122*0fca6ea1SDimitry Andric unsigned LeafCounter = 0; 123*0fca6ea1SDimitry Andric 124*0fca6ea1SDimitry Andric // This keeps track of nodes whose children have been added to the stack. 125*0fca6ea1SDimitry Andric // The value is a pair, representing a node's first and last children. 126*0fca6ea1SDimitry Andric DenseMap<SuffixTreeInternalNode *, 127*0fca6ea1SDimitry Andric std::pair<SuffixTreeNode *, SuffixTreeNode *>> 128*0fca6ea1SDimitry Andric ChildrenMap; 129*0fca6ea1SDimitry Andric 130*0fca6ea1SDimitry Andric // Traverse the tree in post-order. 131*0fca6ea1SDimitry Andric while (!ToVisit.empty()) { 132*0fca6ea1SDimitry Andric SuffixTreeNode *CurrNode = ToVisit.pop_back_val(); 133*0fca6ea1SDimitry Andric if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) { 134*0fca6ea1SDimitry Andric // The current node is an internal node. 135*0fca6ea1SDimitry Andric auto I = ChildrenMap.find(CurrInternalNode); 136*0fca6ea1SDimitry Andric if (I == ChildrenMap.end()) { 137*0fca6ea1SDimitry Andric // This is the first time we visit this node. 138*0fca6ea1SDimitry Andric // Its children have not been added to the stack yet. 139*0fca6ea1SDimitry Andric // We add current node back, and add its children to the stack. 140*0fca6ea1SDimitry Andric // We keep track of the first and last children of the current node. 141*0fca6ea1SDimitry Andric auto J = CurrInternalNode->Children.begin(); 142*0fca6ea1SDimitry Andric if (J != CurrInternalNode->Children.end()) { 143*0fca6ea1SDimitry Andric ToVisit.push_back(CurrNode); 144*0fca6ea1SDimitry Andric SuffixTreeNode *FirstChild = J->second; 145*0fca6ea1SDimitry Andric SuffixTreeNode *LastChild = nullptr; 146*0fca6ea1SDimitry Andric for (; J != CurrInternalNode->Children.end(); ++J) { 147*0fca6ea1SDimitry Andric LastChild = J->second; 148*0fca6ea1SDimitry Andric ToVisit.push_back(LastChild); 149*0fca6ea1SDimitry Andric } 150*0fca6ea1SDimitry Andric ChildrenMap[CurrInternalNode] = {FirstChild, LastChild}; 151*0fca6ea1SDimitry Andric } 152*0fca6ea1SDimitry Andric } else { 153*0fca6ea1SDimitry Andric // This is the second time we visit this node. 154*0fca6ea1SDimitry Andric // All of its children have already been processed. 155*0fca6ea1SDimitry Andric // Now, we can set its LeftLeafIdx and RightLeafIdx; 156*0fca6ea1SDimitry Andric auto [FirstChild, LastChild] = I->second; 157*0fca6ea1SDimitry Andric // Get the first child to use its RightLeafIdx. 158*0fca6ea1SDimitry Andric // The first child is the first one added to the stack, so it is 159*0fca6ea1SDimitry Andric // the last one to be processed. Hence, the leaf descendants 160*0fca6ea1SDimitry Andric // of the first child are assigned the largest index numbers. 161*0fca6ea1SDimitry Andric CurrNode->setRightLeafIdx(FirstChild->getRightLeafIdx()); 162*0fca6ea1SDimitry Andric // Get the last child to use its LeftLeafIdx. 163*0fca6ea1SDimitry Andric CurrNode->setLeftLeafIdx(LastChild->getLeftLeafIdx()); 164*0fca6ea1SDimitry Andric assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() && 165*0fca6ea1SDimitry Andric "LeftLeafIdx should not be larger than RightLeafIdx"); 166*0fca6ea1SDimitry Andric } 167*0fca6ea1SDimitry Andric } else { 168*0fca6ea1SDimitry Andric // The current node is a leaf node. 169*0fca6ea1SDimitry Andric // We can simply set its LeftLeafIdx and RightLeafIdx. 170*0fca6ea1SDimitry Andric CurrNode->setLeftLeafIdx(LeafCounter); 171*0fca6ea1SDimitry Andric CurrNode->setRightLeafIdx(LeafCounter); 172*0fca6ea1SDimitry Andric ++LeafCounter; 173*0fca6ea1SDimitry Andric auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode); 174*0fca6ea1SDimitry Andric LeafNodes.push_back(CurrLeafNode); 175*0fca6ea1SDimitry Andric } 176*0fca6ea1SDimitry Andric } 177*0fca6ea1SDimitry Andric } 178*0fca6ea1SDimitry Andric 1795ffd83dbSDimitry Andric unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) { 18006c3fb27SDimitry Andric SuffixTreeInternalNode *NeedsLink = nullptr; 1815ffd83dbSDimitry Andric 1825ffd83dbSDimitry Andric while (SuffixesToAdd > 0) { 1835ffd83dbSDimitry Andric 1845ffd83dbSDimitry Andric // Are we waiting to add anything other than just the last character? 1855ffd83dbSDimitry Andric if (Active.Len == 0) { 1865ffd83dbSDimitry Andric // If not, then say the active index is the end index. 1875ffd83dbSDimitry Andric Active.Idx = EndIdx; 1885ffd83dbSDimitry Andric } 1895ffd83dbSDimitry Andric 1905ffd83dbSDimitry Andric assert(Active.Idx <= EndIdx && "Start index can't be after end index!"); 1915ffd83dbSDimitry Andric 1925ffd83dbSDimitry Andric // The first character in the current substring we're looking at. 1935ffd83dbSDimitry Andric unsigned FirstChar = Str[Active.Idx]; 1945ffd83dbSDimitry Andric 1955ffd83dbSDimitry Andric // Have we inserted anything starting with FirstChar at the current node? 1965ffd83dbSDimitry Andric if (Active.Node->Children.count(FirstChar) == 0) { 1975ffd83dbSDimitry Andric // If not, then we can just insert a leaf and move to the next step. 1985ffd83dbSDimitry Andric insertLeaf(*Active.Node, EndIdx, FirstChar); 1995ffd83dbSDimitry Andric 2005ffd83dbSDimitry Andric // The active node is an internal node, and we visited it, so it must 2015ffd83dbSDimitry Andric // need a link if it doesn't have one. 2025ffd83dbSDimitry Andric if (NeedsLink) { 20306c3fb27SDimitry Andric NeedsLink->setLink(Active.Node); 2045ffd83dbSDimitry Andric NeedsLink = nullptr; 2055ffd83dbSDimitry Andric } 2065ffd83dbSDimitry Andric } else { 2075ffd83dbSDimitry Andric // There's a match with FirstChar, so look for the point in the tree to 2085ffd83dbSDimitry Andric // insert a new node. 2095ffd83dbSDimitry Andric SuffixTreeNode *NextNode = Active.Node->Children[FirstChar]; 2105ffd83dbSDimitry Andric 21106c3fb27SDimitry Andric unsigned SubstringLen = numElementsInSubstring(NextNode); 2125ffd83dbSDimitry Andric 2135ffd83dbSDimitry Andric // Is the current suffix we're trying to insert longer than the size of 2145ffd83dbSDimitry Andric // the child we want to move to? 2155ffd83dbSDimitry Andric if (Active.Len >= SubstringLen) { 2165ffd83dbSDimitry Andric // If yes, then consume the characters we've seen and move to the next 2175ffd83dbSDimitry Andric // node. 21806c3fb27SDimitry Andric assert(isa<SuffixTreeInternalNode>(NextNode) && 21906c3fb27SDimitry Andric "Expected an internal node?"); 2205ffd83dbSDimitry Andric Active.Idx += SubstringLen; 2215ffd83dbSDimitry Andric Active.Len -= SubstringLen; 22206c3fb27SDimitry Andric Active.Node = cast<SuffixTreeInternalNode>(NextNode); 2235ffd83dbSDimitry Andric continue; 2245ffd83dbSDimitry Andric } 2255ffd83dbSDimitry Andric 2265ffd83dbSDimitry Andric // Otherwise, the suffix we're trying to insert must be contained in the 2275ffd83dbSDimitry Andric // next node we want to move to. 2285ffd83dbSDimitry Andric unsigned LastChar = Str[EndIdx]; 2295ffd83dbSDimitry Andric 2305ffd83dbSDimitry Andric // Is the string we're trying to insert a substring of the next node? 23106c3fb27SDimitry Andric if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) { 2325ffd83dbSDimitry Andric // If yes, then we're done for this step. Remember our insertion point 2335ffd83dbSDimitry Andric // and move to the next end index. At this point, we have an implicit 2345ffd83dbSDimitry Andric // suffix tree. 2355ffd83dbSDimitry Andric if (NeedsLink && !Active.Node->isRoot()) { 23606c3fb27SDimitry Andric NeedsLink->setLink(Active.Node); 2375ffd83dbSDimitry Andric NeedsLink = nullptr; 2385ffd83dbSDimitry Andric } 2395ffd83dbSDimitry Andric 2405ffd83dbSDimitry Andric Active.Len++; 2415ffd83dbSDimitry Andric break; 2425ffd83dbSDimitry Andric } 2435ffd83dbSDimitry Andric 2445ffd83dbSDimitry Andric // The string we're trying to insert isn't a substring of the next node, 2455ffd83dbSDimitry Andric // but matches up to a point. Split the node. 2465ffd83dbSDimitry Andric // 2475ffd83dbSDimitry Andric // For example, say we ended our search at a node n and we're trying to 2485ffd83dbSDimitry Andric // insert ABD. Then we'll create a new node s for AB, reduce n to just 2495ffd83dbSDimitry Andric // representing C, and insert a new leaf node l to represent d. This 2505ffd83dbSDimitry Andric // allows us to ensure that if n was a leaf, it remains a leaf. 2515ffd83dbSDimitry Andric // 2525ffd83dbSDimitry Andric // | ABC ---split---> | AB 2535ffd83dbSDimitry Andric // n s 2545ffd83dbSDimitry Andric // C / \ D 2555ffd83dbSDimitry Andric // n l 2565ffd83dbSDimitry Andric 2575ffd83dbSDimitry Andric // The node s from the diagram 25806c3fb27SDimitry Andric SuffixTreeInternalNode *SplitNode = insertInternalNode( 25906c3fb27SDimitry Andric Active.Node, NextNode->getStartIdx(), 26006c3fb27SDimitry Andric NextNode->getStartIdx() + Active.Len - 1, FirstChar); 2615ffd83dbSDimitry Andric 2625ffd83dbSDimitry Andric // Insert the new node representing the new substring into the tree as 2635ffd83dbSDimitry Andric // a child of the split node. This is the node l from the diagram. 2645ffd83dbSDimitry Andric insertLeaf(*SplitNode, EndIdx, LastChar); 2655ffd83dbSDimitry Andric 2665ffd83dbSDimitry Andric // Make the old node a child of the split node and update its start 2675ffd83dbSDimitry Andric // index. This is the node n from the diagram. 26806c3fb27SDimitry Andric NextNode->incrementStartIdx(Active.Len); 26906c3fb27SDimitry Andric SplitNode->Children[Str[NextNode->getStartIdx()]] = NextNode; 2705ffd83dbSDimitry Andric 2715ffd83dbSDimitry Andric // SplitNode is an internal node, update the suffix link. 2725ffd83dbSDimitry Andric if (NeedsLink) 27306c3fb27SDimitry Andric NeedsLink->setLink(SplitNode); 2745ffd83dbSDimitry Andric 2755ffd83dbSDimitry Andric NeedsLink = SplitNode; 2765ffd83dbSDimitry Andric } 2775ffd83dbSDimitry Andric 2785ffd83dbSDimitry Andric // We've added something new to the tree, so there's one less suffix to 2795ffd83dbSDimitry Andric // add. 2805ffd83dbSDimitry Andric SuffixesToAdd--; 2815ffd83dbSDimitry Andric 2825ffd83dbSDimitry Andric if (Active.Node->isRoot()) { 2835ffd83dbSDimitry Andric if (Active.Len > 0) { 2845ffd83dbSDimitry Andric Active.Len--; 2855ffd83dbSDimitry Andric Active.Idx = EndIdx - SuffixesToAdd + 1; 2865ffd83dbSDimitry Andric } 2875ffd83dbSDimitry Andric } else { 2885ffd83dbSDimitry Andric // Start the next phase at the next smallest suffix. 28906c3fb27SDimitry Andric Active.Node = Active.Node->getLink(); 2905ffd83dbSDimitry Andric } 2915ffd83dbSDimitry Andric } 2925ffd83dbSDimitry Andric 2935ffd83dbSDimitry Andric return SuffixesToAdd; 2945ffd83dbSDimitry Andric } 29506c3fb27SDimitry Andric 29606c3fb27SDimitry Andric void SuffixTree::RepeatedSubstringIterator::advance() { 29706c3fb27SDimitry Andric // Clear the current state. If we're at the end of the range, then this 29806c3fb27SDimitry Andric // is the state we want to be in. 29906c3fb27SDimitry Andric RS = RepeatedSubstring(); 30006c3fb27SDimitry Andric N = nullptr; 30106c3fb27SDimitry Andric 30206c3fb27SDimitry Andric // Each leaf node represents a repeat of a string. 30306c3fb27SDimitry Andric SmallVector<unsigned> RepeatedSubstringStarts; 30406c3fb27SDimitry Andric 30506c3fb27SDimitry Andric // Continue visiting nodes until we find one which repeats more than once. 30606c3fb27SDimitry Andric while (!InternalNodesToVisit.empty()) { 30706c3fb27SDimitry Andric RepeatedSubstringStarts.clear(); 30806c3fb27SDimitry Andric auto *Curr = InternalNodesToVisit.back(); 30906c3fb27SDimitry Andric InternalNodesToVisit.pop_back(); 31006c3fb27SDimitry Andric 31106c3fb27SDimitry Andric // Keep track of the length of the string associated with the node. If 31206c3fb27SDimitry Andric // it's too short, we'll quit. 31306c3fb27SDimitry Andric unsigned Length = Curr->getConcatLen(); 31406c3fb27SDimitry Andric 315*0fca6ea1SDimitry Andric // Iterate over each child, saving internal nodes for visiting. 316*0fca6ea1SDimitry Andric // Internal nodes represent individual strings, which may repeat. 317*0fca6ea1SDimitry Andric for (auto &ChildPair : Curr->Children) 31806c3fb27SDimitry Andric // Save all of this node's children for processing. 31906c3fb27SDimitry Andric if (auto *InternalChild = 320*0fca6ea1SDimitry Andric dyn_cast<SuffixTreeInternalNode>(ChildPair.second)) 32106c3fb27SDimitry Andric InternalNodesToVisit.push_back(InternalChild); 32206c3fb27SDimitry Andric 323*0fca6ea1SDimitry Andric // If length of repeated substring is below threshold, then skip it. 32406c3fb27SDimitry Andric if (Length < MinLength) 32506c3fb27SDimitry Andric continue; 32606c3fb27SDimitry Andric 32706c3fb27SDimitry Andric // The root never represents a repeated substring. If we're looking at 32806c3fb27SDimitry Andric // that, then skip it. 32906c3fb27SDimitry Andric if (Curr->isRoot()) 33006c3fb27SDimitry Andric continue; 33106c3fb27SDimitry Andric 332*0fca6ea1SDimitry Andric // Collect leaf children or leaf descendants by OutlinerLeafDescendants. 333*0fca6ea1SDimitry Andric if (OutlinerLeafDescendants) { 334*0fca6ea1SDimitry Andric for (unsigned I = Curr->getLeftLeafIdx(); I <= Curr->getRightLeafIdx(); 335*0fca6ea1SDimitry Andric ++I) 336*0fca6ea1SDimitry Andric RepeatedSubstringStarts.push_back(LeafNodes[I]->getSuffixIdx()); 337*0fca6ea1SDimitry Andric } else { 338*0fca6ea1SDimitry Andric for (auto &ChildPair : Curr->Children) 339*0fca6ea1SDimitry Andric if (auto *Leaf = dyn_cast<SuffixTreeLeafNode>(ChildPair.second)) 340*0fca6ea1SDimitry Andric RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx()); 341*0fca6ea1SDimitry Andric } 342*0fca6ea1SDimitry Andric 34306c3fb27SDimitry Andric // Do we have any repeated substrings? 34406c3fb27SDimitry Andric if (RepeatedSubstringStarts.size() < 2) 34506c3fb27SDimitry Andric continue; 34606c3fb27SDimitry Andric 34706c3fb27SDimitry Andric // Yes. Update the state to reflect this, and then bail out. 34806c3fb27SDimitry Andric N = Curr; 34906c3fb27SDimitry Andric RS.Length = Length; 35006c3fb27SDimitry Andric for (unsigned StartIdx : RepeatedSubstringStarts) 35106c3fb27SDimitry Andric RS.StartIndices.push_back(StartIdx); 35206c3fb27SDimitry Andric break; 35306c3fb27SDimitry Andric } 35406c3fb27SDimitry Andric // At this point, either NewRS is an empty RepeatedSubstring, or it was 35506c3fb27SDimitry Andric // set in the above loop. Similarly, N is either nullptr, or the node 35606c3fb27SDimitry Andric // associated with NewRS. 35706c3fb27SDimitry Andric } 358