xref: /llvm-project/llvm/lib/Support/SuffixTree.cpp (revision d9a00ed3668803d11675b103fe9b6ed077ddc4c1)
1bb677cacSAndrew Litteken //===- llvm/Support/SuffixTree.cpp - Implement Suffix Tree ------*- C++ -*-===//
2bb677cacSAndrew Litteken //
3bb677cacSAndrew Litteken // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4bb677cacSAndrew Litteken // See https://llvm.org/LICENSE.txt for license information.
5bb677cacSAndrew Litteken // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6bb677cacSAndrew Litteken //
7bb677cacSAndrew Litteken //===----------------------------------------------------------------------===//
8bb677cacSAndrew Litteken //
9bb677cacSAndrew Litteken // This file implements the Suffix Tree class.
10bb677cacSAndrew Litteken //
11bb677cacSAndrew Litteken //===----------------------------------------------------------------------===//
12bb677cacSAndrew Litteken 
13bb677cacSAndrew Litteken #include "llvm/Support/SuffixTree.h"
14bb677cacSAndrew Litteken #include "llvm/Support/Allocator.h"
15c2eeaf10SJessica Paquette #include "llvm/Support/Casting.h"
16c2eeaf10SJessica Paquette #include "llvm/Support/SuffixTreeNode.h"
17bb677cacSAndrew Litteken 
18bb677cacSAndrew Litteken using namespace llvm;
19bb677cacSAndrew Litteken 
20c2f0c204SJessica Paquette /// \returns the number of elements in the substring associated with \p N.
numElementsInSubstring(const SuffixTreeNode * N)21c2f0c204SJessica Paquette static size_t numElementsInSubstring(const SuffixTreeNode *N) {
22c2f0c204SJessica Paquette   assert(N && "Got a null node?");
23c2f0c204SJessica Paquette   if (auto *Internal = dyn_cast<SuffixTreeInternalNode>(N))
24c2f0c204SJessica Paquette     if (Internal->isRoot())
25c2f0c204SJessica Paquette       return 0;
26c2f0c204SJessica Paquette   return N->getEndIdx() - N->getStartIdx() + 1;
27c2f0c204SJessica Paquette }
28c2f0c204SJessica Paquette 
SuffixTree(const ArrayRef<unsigned> & Str,bool OutlinerLeafDescendants)29*d9a00ed3SXuan Zhang SuffixTree::SuffixTree(const ArrayRef<unsigned> &Str,
30*d9a00ed3SXuan Zhang                        bool OutlinerLeafDescendants)
31*d9a00ed3SXuan Zhang     : Str(Str), OutlinerLeafDescendants(OutlinerLeafDescendants) {
3266520c04SJessica Paquette   Root = insertRoot();
33bb677cacSAndrew Litteken   Active.Node = Root;
34bb677cacSAndrew Litteken 
35bb677cacSAndrew Litteken   // Keep track of the number of suffixes we have to add of the current
36bb677cacSAndrew Litteken   // prefix.
37bb677cacSAndrew Litteken   unsigned SuffixesToAdd = 0;
38bb677cacSAndrew Litteken 
39bb677cacSAndrew Litteken   // Construct the suffix tree iteratively on each prefix of the string.
40bb677cacSAndrew Litteken   // PfxEndIdx is the end index of the current prefix.
41bb677cacSAndrew Litteken   // End is one past the last element in the string.
42bb677cacSAndrew Litteken   for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) {
43bb677cacSAndrew Litteken     SuffixesToAdd++;
44bb677cacSAndrew Litteken     LeafEndIdx = PfxEndIdx; // Extend each of the leaves.
45bb677cacSAndrew Litteken     SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd);
46bb677cacSAndrew Litteken   }
47bb677cacSAndrew Litteken 
48bb677cacSAndrew Litteken   // Set the suffix indices of each leaf.
49bb677cacSAndrew Litteken   assert(Root && "Root node can't be nullptr!");
50bb677cacSAndrew Litteken   setSuffixIndices();
51*d9a00ed3SXuan Zhang 
52*d9a00ed3SXuan Zhang   // Collect all leaf nodes of the suffix tree. And for each internal node,
53*d9a00ed3SXuan Zhang   // record the range of leaf nodes that are descendants of it.
54*d9a00ed3SXuan Zhang   if (OutlinerLeafDescendants)
55*d9a00ed3SXuan Zhang     setLeafNodes();
56bb677cacSAndrew Litteken }
57bb677cacSAndrew Litteken 
insertLeaf(SuffixTreeInternalNode & Parent,unsigned StartIdx,unsigned Edge)58c2f0c204SJessica Paquette SuffixTreeNode *SuffixTree::insertLeaf(SuffixTreeInternalNode &Parent,
59bb677cacSAndrew Litteken                                        unsigned StartIdx, unsigned Edge) {
60bb677cacSAndrew Litteken   assert(StartIdx <= LeafEndIdx && "String can't start after it ends!");
61c2f0c204SJessica Paquette   auto *N = new (LeafNodeAllocator.Allocate())
62c2f0c204SJessica Paquette       SuffixTreeLeafNode(StartIdx, &LeafEndIdx);
63bb677cacSAndrew Litteken   Parent.Children[Edge] = N;
64bb677cacSAndrew Litteken   return N;
65bb677cacSAndrew Litteken }
66bb677cacSAndrew Litteken 
67c2f0c204SJessica Paquette SuffixTreeInternalNode *
insertInternalNode(SuffixTreeInternalNode * Parent,unsigned StartIdx,unsigned EndIdx,unsigned Edge)68c2f0c204SJessica Paquette SuffixTree::insertInternalNode(SuffixTreeInternalNode *Parent,
69c2f0c204SJessica Paquette                                unsigned StartIdx, unsigned EndIdx,
70c2f0c204SJessica Paquette                                unsigned Edge) {
71bb677cacSAndrew Litteken   assert(StartIdx <= EndIdx && "String can't start after it ends!");
7266520c04SJessica Paquette   assert(!(!Parent && StartIdx != SuffixTreeNode::EmptyIdx) &&
73bb677cacSAndrew Litteken          "Non-root internal nodes must have parents!");
74c2f0c204SJessica Paquette   auto *N = new (InternalNodeAllocator.Allocate())
75c2f0c204SJessica Paquette       SuffixTreeInternalNode(StartIdx, EndIdx, Root);
76bb677cacSAndrew Litteken   if (Parent)
77bb677cacSAndrew Litteken     Parent->Children[Edge] = N;
78bb677cacSAndrew Litteken   return N;
79bb677cacSAndrew Litteken }
80bb677cacSAndrew Litteken 
insertRoot()8166520c04SJessica Paquette SuffixTreeInternalNode *SuffixTree::insertRoot() {
8266520c04SJessica Paquette   return insertInternalNode(/*Parent = */ nullptr, SuffixTreeNode::EmptyIdx,
8366520c04SJessica Paquette                             SuffixTreeNode::EmptyIdx, /*Edge = */ 0);
8466520c04SJessica Paquette }
8566520c04SJessica Paquette 
setSuffixIndices()86bb677cacSAndrew Litteken void SuffixTree::setSuffixIndices() {
87bb677cacSAndrew Litteken   // List of nodes we need to visit along with the current length of the
88bb677cacSAndrew Litteken   // string.
89ec37ebf5SJessica Paquette   SmallVector<std::pair<SuffixTreeNode *, unsigned>> ToVisit;
90bb677cacSAndrew Litteken 
91bb677cacSAndrew Litteken   // Current node being visited.
92bb677cacSAndrew Litteken   SuffixTreeNode *CurrNode = Root;
93bb677cacSAndrew Litteken 
94bb677cacSAndrew Litteken   // Sum of the lengths of the nodes down the path to the current one.
95bb677cacSAndrew Litteken   unsigned CurrNodeLen = 0;
96bb677cacSAndrew Litteken   ToVisit.push_back({CurrNode, CurrNodeLen});
97bb677cacSAndrew Litteken   while (!ToVisit.empty()) {
98bb677cacSAndrew Litteken     std::tie(CurrNode, CurrNodeLen) = ToVisit.back();
99bb677cacSAndrew Litteken     ToVisit.pop_back();
100c2f0c204SJessica Paquette     // Length of the current node from the root down to here.
101c2f0c204SJessica Paquette     CurrNode->setConcatLen(CurrNodeLen);
102c2f0c204SJessica Paquette     if (auto *InternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode))
103c2f0c204SJessica Paquette       for (auto &ChildPair : InternalNode->Children) {
104bb677cacSAndrew Litteken         assert(ChildPair.second && "Node had a null child!");
105bb677cacSAndrew Litteken         ToVisit.push_back(
106c2f0c204SJessica Paquette             {ChildPair.second,
107c2f0c204SJessica Paquette              CurrNodeLen + numElementsInSubstring(ChildPair.second)});
108bb677cacSAndrew Litteken       }
109bb677cacSAndrew Litteken     // No children, so we are at the end of the string.
110c2f0c204SJessica Paquette     if (auto *LeafNode = dyn_cast<SuffixTreeLeafNode>(CurrNode))
111c2f0c204SJessica Paquette       LeafNode->setSuffixIdx(Str.size() - CurrNodeLen);
112bb677cacSAndrew Litteken   }
113bb677cacSAndrew Litteken }
114bb677cacSAndrew Litteken 
setLeafNodes()115*d9a00ed3SXuan Zhang void SuffixTree::setLeafNodes() {
116*d9a00ed3SXuan Zhang   // A stack that keeps track of nodes to visit for post-order DFS traversal.
117*d9a00ed3SXuan Zhang   SmallVector<SuffixTreeNode *> ToVisit;
118*d9a00ed3SXuan Zhang   ToVisit.push_back(Root);
119*d9a00ed3SXuan Zhang 
120*d9a00ed3SXuan Zhang   // This keeps track of the index of the next leaf node to be added to
121*d9a00ed3SXuan Zhang   // the LeafNodes vector of the suffix tree.
122*d9a00ed3SXuan Zhang   unsigned LeafCounter = 0;
123*d9a00ed3SXuan Zhang 
124*d9a00ed3SXuan Zhang   // This keeps track of nodes whose children have been added to the stack.
125*d9a00ed3SXuan Zhang   // The value is a pair, representing a node's first and last children.
126*d9a00ed3SXuan Zhang   DenseMap<SuffixTreeInternalNode *,
127*d9a00ed3SXuan Zhang            std::pair<SuffixTreeNode *, SuffixTreeNode *>>
128*d9a00ed3SXuan Zhang       ChildrenMap;
129*d9a00ed3SXuan Zhang 
130*d9a00ed3SXuan Zhang   // Traverse the tree in post-order.
131*d9a00ed3SXuan Zhang   while (!ToVisit.empty()) {
132*d9a00ed3SXuan Zhang     SuffixTreeNode *CurrNode = ToVisit.pop_back_val();
133*d9a00ed3SXuan Zhang     if (auto *CurrInternalNode = dyn_cast<SuffixTreeInternalNode>(CurrNode)) {
134*d9a00ed3SXuan Zhang       // The current node is an internal node.
135*d9a00ed3SXuan Zhang       auto I = ChildrenMap.find(CurrInternalNode);
136*d9a00ed3SXuan Zhang       if (I == ChildrenMap.end()) {
137*d9a00ed3SXuan Zhang         // This is the first time we visit this node.
138*d9a00ed3SXuan Zhang         // Its children have not been added to the stack yet.
139*d9a00ed3SXuan Zhang         // We add current node back, and add its children to the stack.
140*d9a00ed3SXuan Zhang         // We keep track of the first and last children of the current node.
141*d9a00ed3SXuan Zhang         auto J = CurrInternalNode->Children.begin();
142*d9a00ed3SXuan Zhang         if (J != CurrInternalNode->Children.end()) {
143*d9a00ed3SXuan Zhang           ToVisit.push_back(CurrNode);
144*d9a00ed3SXuan Zhang           SuffixTreeNode *FirstChild = J->second;
145*d9a00ed3SXuan Zhang           SuffixTreeNode *LastChild = nullptr;
146*d9a00ed3SXuan Zhang           for (; J != CurrInternalNode->Children.end(); ++J) {
147*d9a00ed3SXuan Zhang             LastChild = J->second;
148*d9a00ed3SXuan Zhang             ToVisit.push_back(LastChild);
149*d9a00ed3SXuan Zhang           }
150*d9a00ed3SXuan Zhang           ChildrenMap[CurrInternalNode] = {FirstChild, LastChild};
151*d9a00ed3SXuan Zhang         }
152*d9a00ed3SXuan Zhang       } else {
153*d9a00ed3SXuan Zhang         // This is the second time we visit this node.
154*d9a00ed3SXuan Zhang         // All of its children have already been processed.
155*d9a00ed3SXuan Zhang         // Now, we can set its LeftLeafIdx and RightLeafIdx;
156*d9a00ed3SXuan Zhang         auto [FirstChild, LastChild] = I->second;
157*d9a00ed3SXuan Zhang         // Get the first child to use its RightLeafIdx.
158*d9a00ed3SXuan Zhang         // The first child is the first one added to the stack, so it is
159*d9a00ed3SXuan Zhang         // the last one to be processed. Hence, the leaf descendants
160*d9a00ed3SXuan Zhang         // of the first child are assigned the largest index numbers.
161*d9a00ed3SXuan Zhang         CurrNode->setRightLeafIdx(FirstChild->getRightLeafIdx());
162*d9a00ed3SXuan Zhang         // Get the last child to use its LeftLeafIdx.
163*d9a00ed3SXuan Zhang         CurrNode->setLeftLeafIdx(LastChild->getLeftLeafIdx());
164*d9a00ed3SXuan Zhang         assert(CurrNode->getLeftLeafIdx() <= CurrNode->getRightLeafIdx() &&
165*d9a00ed3SXuan Zhang                "LeftLeafIdx should not be larger than RightLeafIdx");
166*d9a00ed3SXuan Zhang       }
167*d9a00ed3SXuan Zhang     } else {
168*d9a00ed3SXuan Zhang       // The current node is a leaf node.
169*d9a00ed3SXuan Zhang       // We can simply set its LeftLeafIdx and RightLeafIdx.
170*d9a00ed3SXuan Zhang       CurrNode->setLeftLeafIdx(LeafCounter);
171*d9a00ed3SXuan Zhang       CurrNode->setRightLeafIdx(LeafCounter);
172*d9a00ed3SXuan Zhang       ++LeafCounter;
173*d9a00ed3SXuan Zhang       auto *CurrLeafNode = cast<SuffixTreeLeafNode>(CurrNode);
174*d9a00ed3SXuan Zhang       LeafNodes.push_back(CurrLeafNode);
175*d9a00ed3SXuan Zhang     }
176*d9a00ed3SXuan Zhang   }
177*d9a00ed3SXuan Zhang }
178*d9a00ed3SXuan Zhang 
extend(unsigned EndIdx,unsigned SuffixesToAdd)179bb677cacSAndrew Litteken unsigned SuffixTree::extend(unsigned EndIdx, unsigned SuffixesToAdd) {
180c2f0c204SJessica Paquette   SuffixTreeInternalNode *NeedsLink = nullptr;
181bb677cacSAndrew Litteken 
182bb677cacSAndrew Litteken   while (SuffixesToAdd > 0) {
183bb677cacSAndrew Litteken 
184bb677cacSAndrew Litteken     // Are we waiting to add anything other than just the last character?
185bb677cacSAndrew Litteken     if (Active.Len == 0) {
186bb677cacSAndrew Litteken       // If not, then say the active index is the end index.
187bb677cacSAndrew Litteken       Active.Idx = EndIdx;
188bb677cacSAndrew Litteken     }
189bb677cacSAndrew Litteken 
190bb677cacSAndrew Litteken     assert(Active.Idx <= EndIdx && "Start index can't be after end index!");
191bb677cacSAndrew Litteken 
192bb677cacSAndrew Litteken     // The first character in the current substring we're looking at.
193bb677cacSAndrew Litteken     unsigned FirstChar = Str[Active.Idx];
194bb677cacSAndrew Litteken 
195bb677cacSAndrew Litteken     // Have we inserted anything starting with FirstChar at the current node?
196bb677cacSAndrew Litteken     if (Active.Node->Children.count(FirstChar) == 0) {
197bb677cacSAndrew Litteken       // If not, then we can just insert a leaf and move to the next step.
198bb677cacSAndrew Litteken       insertLeaf(*Active.Node, EndIdx, FirstChar);
199bb677cacSAndrew Litteken 
200bb677cacSAndrew Litteken       // The active node is an internal node, and we visited it, so it must
201bb677cacSAndrew Litteken       // need a link if it doesn't have one.
202bb677cacSAndrew Litteken       if (NeedsLink) {
203c2f0c204SJessica Paquette         NeedsLink->setLink(Active.Node);
204bb677cacSAndrew Litteken         NeedsLink = nullptr;
205bb677cacSAndrew Litteken       }
206bb677cacSAndrew Litteken     } else {
207bb677cacSAndrew Litteken       // There's a match with FirstChar, so look for the point in the tree to
208bb677cacSAndrew Litteken       // insert a new node.
209bb677cacSAndrew Litteken       SuffixTreeNode *NextNode = Active.Node->Children[FirstChar];
210bb677cacSAndrew Litteken 
211c2f0c204SJessica Paquette       unsigned SubstringLen = numElementsInSubstring(NextNode);
212bb677cacSAndrew Litteken 
213bb677cacSAndrew Litteken       // Is the current suffix we're trying to insert longer than the size of
214bb677cacSAndrew Litteken       // the child we want to move to?
215bb677cacSAndrew Litteken       if (Active.Len >= SubstringLen) {
216bb677cacSAndrew Litteken         // If yes, then consume the characters we've seen and move to the next
217bb677cacSAndrew Litteken         // node.
218c2f0c204SJessica Paquette         assert(isa<SuffixTreeInternalNode>(NextNode) &&
219c2f0c204SJessica Paquette                "Expected an internal node?");
220bb677cacSAndrew Litteken         Active.Idx += SubstringLen;
221bb677cacSAndrew Litteken         Active.Len -= SubstringLen;
222c2f0c204SJessica Paquette         Active.Node = cast<SuffixTreeInternalNode>(NextNode);
223bb677cacSAndrew Litteken         continue;
224bb677cacSAndrew Litteken       }
225bb677cacSAndrew Litteken 
226bb677cacSAndrew Litteken       // Otherwise, the suffix we're trying to insert must be contained in the
227bb677cacSAndrew Litteken       // next node we want to move to.
228bb677cacSAndrew Litteken       unsigned LastChar = Str[EndIdx];
229bb677cacSAndrew Litteken 
230bb677cacSAndrew Litteken       // Is the string we're trying to insert a substring of the next node?
231c2f0c204SJessica Paquette       if (Str[NextNode->getStartIdx() + Active.Len] == LastChar) {
232bb677cacSAndrew Litteken         // If yes, then we're done for this step. Remember our insertion point
233bb677cacSAndrew Litteken         // and move to the next end index. At this point, we have an implicit
234bb677cacSAndrew Litteken         // suffix tree.
235bb677cacSAndrew Litteken         if (NeedsLink && !Active.Node->isRoot()) {
236c2f0c204SJessica Paquette           NeedsLink->setLink(Active.Node);
237bb677cacSAndrew Litteken           NeedsLink = nullptr;
238bb677cacSAndrew Litteken         }
239bb677cacSAndrew Litteken 
240bb677cacSAndrew Litteken         Active.Len++;
241bb677cacSAndrew Litteken         break;
242bb677cacSAndrew Litteken       }
243bb677cacSAndrew Litteken 
244bb677cacSAndrew Litteken       // The string we're trying to insert isn't a substring of the next node,
245bb677cacSAndrew Litteken       // but matches up to a point. Split the node.
246bb677cacSAndrew Litteken       //
247bb677cacSAndrew Litteken       // For example, say we ended our search at a node n and we're trying to
248bb677cacSAndrew Litteken       // insert ABD. Then we'll create a new node s for AB, reduce n to just
249bb677cacSAndrew Litteken       // representing C, and insert a new leaf node l to represent d. This
250bb677cacSAndrew Litteken       // allows us to ensure that if n was a leaf, it remains a leaf.
251bb677cacSAndrew Litteken       //
252bb677cacSAndrew Litteken       //   | ABC  ---split--->  | AB
253bb677cacSAndrew Litteken       //   n                    s
254bb677cacSAndrew Litteken       //                     C / \ D
255bb677cacSAndrew Litteken       //                      n   l
256bb677cacSAndrew Litteken 
257bb677cacSAndrew Litteken       // The node s from the diagram
258c2eeaf10SJessica Paquette       SuffixTreeInternalNode *SplitNode = insertInternalNode(
259c2eeaf10SJessica Paquette           Active.Node, NextNode->getStartIdx(),
260c2f0c204SJessica Paquette           NextNode->getStartIdx() + Active.Len - 1, FirstChar);
261bb677cacSAndrew Litteken 
262bb677cacSAndrew Litteken       // Insert the new node representing the new substring into the tree as
263bb677cacSAndrew Litteken       // a child of the split node. This is the node l from the diagram.
264bb677cacSAndrew Litteken       insertLeaf(*SplitNode, EndIdx, LastChar);
265bb677cacSAndrew Litteken 
266bb677cacSAndrew Litteken       // Make the old node a child of the split node and update its start
267bb677cacSAndrew Litteken       // index. This is the node n from the diagram.
268c2f0c204SJessica Paquette       NextNode->incrementStartIdx(Active.Len);
269c2f0c204SJessica Paquette       SplitNode->Children[Str[NextNode->getStartIdx()]] = NextNode;
270bb677cacSAndrew Litteken 
271bb677cacSAndrew Litteken       // SplitNode is an internal node, update the suffix link.
272bb677cacSAndrew Litteken       if (NeedsLink)
273c2f0c204SJessica Paquette         NeedsLink->setLink(SplitNode);
274bb677cacSAndrew Litteken 
275bb677cacSAndrew Litteken       NeedsLink = SplitNode;
276bb677cacSAndrew Litteken     }
277bb677cacSAndrew Litteken 
278bb677cacSAndrew Litteken     // We've added something new to the tree, so there's one less suffix to
279bb677cacSAndrew Litteken     // add.
280bb677cacSAndrew Litteken     SuffixesToAdd--;
281bb677cacSAndrew Litteken 
282bb677cacSAndrew Litteken     if (Active.Node->isRoot()) {
283bb677cacSAndrew Litteken       if (Active.Len > 0) {
284bb677cacSAndrew Litteken         Active.Len--;
285bb677cacSAndrew Litteken         Active.Idx = EndIdx - SuffixesToAdd + 1;
286bb677cacSAndrew Litteken       }
287bb677cacSAndrew Litteken     } else {
288bb677cacSAndrew Litteken       // Start the next phase at the next smallest suffix.
289c2f0c204SJessica Paquette       Active.Node = Active.Node->getLink();
290bb677cacSAndrew Litteken     }
291bb677cacSAndrew Litteken   }
292bb677cacSAndrew Litteken 
293bb677cacSAndrew Litteken   return SuffixesToAdd;
294bb677cacSAndrew Litteken }
295c2eeaf10SJessica Paquette 
advance()296c2eeaf10SJessica Paquette void SuffixTree::RepeatedSubstringIterator::advance() {
297c2eeaf10SJessica Paquette   // Clear the current state. If we're at the end of the range, then this
298c2eeaf10SJessica Paquette   // is the state we want to be in.
299c2eeaf10SJessica Paquette   RS = RepeatedSubstring();
300c2eeaf10SJessica Paquette   N = nullptr;
301c2eeaf10SJessica Paquette 
302c2eeaf10SJessica Paquette   // Each leaf node represents a repeat of a string.
303c2eeaf10SJessica Paquette   SmallVector<unsigned> RepeatedSubstringStarts;
304c2eeaf10SJessica Paquette 
305c2eeaf10SJessica Paquette   // Continue visiting nodes until we find one which repeats more than once.
306c2eeaf10SJessica Paquette   while (!InternalNodesToVisit.empty()) {
307c2eeaf10SJessica Paquette     RepeatedSubstringStarts.clear();
308c2eeaf10SJessica Paquette     auto *Curr = InternalNodesToVisit.back();
309c2eeaf10SJessica Paquette     InternalNodesToVisit.pop_back();
310c2eeaf10SJessica Paquette 
311c2eeaf10SJessica Paquette     // Keep track of the length of the string associated with the node. If
312c2eeaf10SJessica Paquette     // it's too short, we'll quit.
313c2eeaf10SJessica Paquette     unsigned Length = Curr->getConcatLen();
314c2eeaf10SJessica Paquette 
315*d9a00ed3SXuan Zhang     // Iterate over each child, saving internal nodes for visiting.
316*d9a00ed3SXuan Zhang     // Internal nodes represent individual strings, which may repeat.
317*d9a00ed3SXuan Zhang     for (auto &ChildPair : Curr->Children)
318c2eeaf10SJessica Paquette       // Save all of this node's children for processing.
319c2eeaf10SJessica Paquette       if (auto *InternalChild =
320*d9a00ed3SXuan Zhang               dyn_cast<SuffixTreeInternalNode>(ChildPair.second))
321c2eeaf10SJessica Paquette         InternalNodesToVisit.push_back(InternalChild);
322c2eeaf10SJessica Paquette 
323*d9a00ed3SXuan Zhang     // If length of repeated substring is below threshold, then skip it.
324c2eeaf10SJessica Paquette     if (Length < MinLength)
325c2eeaf10SJessica Paquette       continue;
326c2eeaf10SJessica Paquette 
327c2eeaf10SJessica Paquette     // The root never represents a repeated substring. If we're looking at
328c2eeaf10SJessica Paquette     // that, then skip it.
329c2eeaf10SJessica Paquette     if (Curr->isRoot())
330c2eeaf10SJessica Paquette       continue;
331c2eeaf10SJessica Paquette 
332*d9a00ed3SXuan Zhang     // Collect leaf children or leaf descendants by OutlinerLeafDescendants.
333*d9a00ed3SXuan Zhang     if (OutlinerLeafDescendants) {
334*d9a00ed3SXuan Zhang       for (unsigned I = Curr->getLeftLeafIdx(); I <= Curr->getRightLeafIdx();
335*d9a00ed3SXuan Zhang            ++I)
336*d9a00ed3SXuan Zhang         RepeatedSubstringStarts.push_back(LeafNodes[I]->getSuffixIdx());
337*d9a00ed3SXuan Zhang     } else {
338*d9a00ed3SXuan Zhang       for (auto &ChildPair : Curr->Children)
339*d9a00ed3SXuan Zhang         if (auto *Leaf = dyn_cast<SuffixTreeLeafNode>(ChildPair.second))
340*d9a00ed3SXuan Zhang           RepeatedSubstringStarts.push_back(Leaf->getSuffixIdx());
341*d9a00ed3SXuan Zhang     }
342*d9a00ed3SXuan Zhang 
343c2eeaf10SJessica Paquette     // Do we have any repeated substrings?
344c2eeaf10SJessica Paquette     if (RepeatedSubstringStarts.size() < 2)
345c2eeaf10SJessica Paquette       continue;
346c2eeaf10SJessica Paquette 
347c2eeaf10SJessica Paquette     // Yes. Update the state to reflect this, and then bail out.
348c2eeaf10SJessica Paquette     N = Curr;
349c2eeaf10SJessica Paquette     RS.Length = Length;
350c2eeaf10SJessica Paquette     for (unsigned StartIdx : RepeatedSubstringStarts)
351c2eeaf10SJessica Paquette       RS.StartIndices.push_back(StartIdx);
352c2eeaf10SJessica Paquette     break;
353c2eeaf10SJessica Paquette   }
354c2eeaf10SJessica Paquette   // At this point, either NewRS is an empty RepeatedSubstring, or it was
355c2eeaf10SJessica Paquette   // set in the above loop. Similarly, N is either nullptr, or the node
356c2eeaf10SJessica Paquette   // associated with NewRS.
357c2eeaf10SJessica Paquette }
358