xref: /llvm-project/llvm/lib/Support/DeltaTree.cpp (revision 0d150db214e2aa13a825b563c7238e1243d61db1)
1 //===- DeltaTree.cpp - B-Tree for Rewrite Delta tracking ------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the DeltaTree and related classes.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/ADT/DeltaTree.h"
14 #include "llvm/Support/Casting.h"
15 #include <cassert>
16 #include <cstring>
17 
18 using namespace llvm;
19 
20 /// The DeltaTree class is a multiway search tree (BTree) structure with some
21 /// fancy features.  B-Trees are generally more memory and cache efficient
22 /// than binary trees, because they store multiple keys/values in each node.
23 ///
24 /// DeltaTree implements a key/value mapping from FileIndex to Delta, allowing
25 /// fast lookup by FileIndex.  However, an added (important) bonus is that it
26 /// can also efficiently tell us the full accumulated delta for a specific
27 /// file offset as well, without traversing the whole tree.
28 ///
29 /// The nodes of the tree are made up of instances of two classes:
30 /// DeltaTreeNode and DeltaTreeInteriorNode.  The later subclasses the
31 /// former and adds children pointers.  Each node knows the full delta of all
32 /// entries (recursively) contained inside of it, which allows us to get the
33 /// full delta implied by a whole subtree in constant time.
34 
35 namespace {
36 
37 /// SourceDelta - As code in the original input buffer is added and deleted,
38 /// SourceDelta records are used to keep track of how the input SourceLocation
39 /// object is mapped into the output buffer.
40 struct SourceDelta {
41   unsigned FileLoc;
42   int Delta;
43 
44   static SourceDelta get(unsigned Loc, int D) {
45     SourceDelta Delta;
46     Delta.FileLoc = Loc;
47     Delta.Delta = D;
48     return Delta;
49   }
50 };
51 
52 /// DeltaTreeNode - The common part of all nodes.
53 ///
54 class DeltaTreeNode {
55 public:
56   struct InsertResult {
57     DeltaTreeNode *LHS, *RHS;
58     SourceDelta Split;
59   };
60 
61 private:
62   friend class DeltaTreeInteriorNode;
63 
64   /// WidthFactor - This controls the number of K/V slots held in the BTree:
65   /// how wide it is.  Each level of the BTree is guaranteed to have at least
66   /// WidthFactor-1 K/V pairs (except the root) and may have at most
67   /// 2*WidthFactor-1 K/V pairs.
68   enum { WidthFactor = 8 };
69 
70   /// Values - This tracks the SourceDelta's currently in this node.
71   SourceDelta Values[2 * WidthFactor - 1];
72 
73   /// NumValuesUsed - This tracks the number of values this node currently
74   /// holds.
75   unsigned char NumValuesUsed = 0;
76 
77   /// IsLeaf - This is true if this is a leaf of the btree.  If false, this is
78   /// an interior node, and is actually an instance of DeltaTreeInteriorNode.
79   bool IsLeaf;
80 
81   /// FullDelta - This is the full delta of all the values in this node and
82   /// all children nodes.
83   int FullDelta = 0;
84 
85 public:
86   DeltaTreeNode(bool isLeaf = true) : IsLeaf(isLeaf) {}
87 
88   bool isLeaf() const { return IsLeaf; }
89   int getFullDelta() const { return FullDelta; }
90   bool isFull() const { return NumValuesUsed == 2 * WidthFactor - 1; }
91 
92   unsigned getNumValuesUsed() const { return NumValuesUsed; }
93 
94   const SourceDelta &getValue(unsigned i) const {
95     assert(i < NumValuesUsed && "Invalid value #");
96     return Values[i];
97   }
98 
99   SourceDelta &getValue(unsigned i) {
100     assert(i < NumValuesUsed && "Invalid value #");
101     return Values[i];
102   }
103 
104   /// DoInsertion - Do an insertion of the specified FileIndex/Delta pair into
105   /// this node.  If insertion is easy, do it and return false.  Otherwise,
106   /// split the node, populate InsertRes with info about the split, and return
107   /// true.
108   bool DoInsertion(unsigned FileIndex, int Delta, InsertResult *InsertRes);
109 
110   void DoSplit(InsertResult &InsertRes);
111 
112   /// RecomputeFullDeltaLocally - Recompute the FullDelta field by doing a
113   /// local walk over our contained deltas.
114   void RecomputeFullDeltaLocally();
115 
116   void Destroy();
117 };
118 
119 /// DeltaTreeInteriorNode - When isLeaf = false, a node has child pointers.
120 /// This class tracks them.
121 class DeltaTreeInteriorNode : public DeltaTreeNode {
122   friend class DeltaTreeNode;
123 
124   DeltaTreeNode *Children[2 * WidthFactor];
125 
126   ~DeltaTreeInteriorNode() {
127     for (unsigned i = 0, e = NumValuesUsed + 1; i != e; ++i)
128       Children[i]->Destroy();
129   }
130 
131 public:
132   DeltaTreeInteriorNode() : DeltaTreeNode(false /*nonleaf*/) {}
133 
134   DeltaTreeInteriorNode(const InsertResult &IR)
135       : DeltaTreeNode(false /*nonleaf*/) {
136     Children[0] = IR.LHS;
137     Children[1] = IR.RHS;
138     Values[0] = IR.Split;
139     FullDelta =
140         IR.LHS->getFullDelta() + IR.RHS->getFullDelta() + IR.Split.Delta;
141     NumValuesUsed = 1;
142   }
143 
144   const DeltaTreeNode *getChild(unsigned i) const {
145     assert(i < getNumValuesUsed() + 1 && "Invalid child");
146     return Children[i];
147   }
148 
149   DeltaTreeNode *getChild(unsigned i) {
150     assert(i < getNumValuesUsed() + 1 && "Invalid child");
151     return Children[i];
152   }
153 
154   static bool classof(const DeltaTreeNode *N) { return !N->isLeaf(); }
155 };
156 
157 } // namespace
158 
159 /// Destroy - A 'virtual' destructor.
160 void DeltaTreeNode::Destroy() {
161   if (isLeaf())
162     delete this;
163   else
164     delete cast<DeltaTreeInteriorNode>(this);
165 }
166 
167 /// RecomputeFullDeltaLocally - Recompute the FullDelta field by doing a
168 /// local walk over our contained deltas.
169 void DeltaTreeNode::RecomputeFullDeltaLocally() {
170   int NewFullDelta = 0;
171   for (unsigned i = 0, e = getNumValuesUsed(); i != e; ++i)
172     NewFullDelta += Values[i].Delta;
173   if (auto *IN = dyn_cast<DeltaTreeInteriorNode>(this))
174     for (unsigned i = 0, e = getNumValuesUsed() + 1; i != e; ++i)
175       NewFullDelta += IN->getChild(i)->getFullDelta();
176   FullDelta = NewFullDelta;
177 }
178 
179 /// DoInsertion - Do an insertion of the specified FileIndex/Delta pair into
180 /// this node.  If insertion is easy, do it and return false.  Otherwise,
181 /// split the node, populate InsertRes with info about the split, and return
182 /// true.
183 bool DeltaTreeNode::DoInsertion(unsigned FileIndex, int Delta,
184                                 InsertResult *InsertRes) {
185   // Maintain full delta for this node.
186   FullDelta += Delta;
187 
188   // Find the insertion point, the first delta whose index is >= FileIndex.
189   unsigned i = 0, e = getNumValuesUsed();
190   while (i != e && FileIndex > getValue(i).FileLoc)
191     ++i;
192 
193   // If we found an a record for exactly this file index, just merge this
194   // value into the pre-existing record and finish early.
195   if (i != e && getValue(i).FileLoc == FileIndex) {
196     // NOTE: Delta could drop to zero here.  This means that the delta entry is
197     // useless and could be removed.  Supporting erases is more complex than
198     // leaving an entry with Delta=0, so we just leave an entry with Delta=0 in
199     // the tree.
200     Values[i].Delta += Delta;
201     return false;
202   }
203 
204   // Otherwise, we found an insertion point, and we know that the value at the
205   // specified index is > FileIndex.  Handle the leaf case first.
206   if (isLeaf()) {
207     if (!isFull()) {
208       // For an insertion into a non-full leaf node, just insert the value in
209       // its sorted position.  This requires moving later values over.
210       if (i != e)
211         memmove(&Values[i + 1], &Values[i], sizeof(Values[0]) * (e - i));
212       Values[i] = SourceDelta::get(FileIndex, Delta);
213       ++NumValuesUsed;
214       return false;
215     }
216 
217     // Otherwise, if this is leaf is full, split the node at its median, insert
218     // the value into one of the children, and return the result.
219     assert(InsertRes && "No result location specified");
220     DoSplit(*InsertRes);
221 
222     if (InsertRes->Split.FileLoc > FileIndex)
223       InsertRes->LHS->DoInsertion(FileIndex, Delta, nullptr /*can't fail*/);
224     else
225       InsertRes->RHS->DoInsertion(FileIndex, Delta, nullptr /*can't fail*/);
226     return true;
227   }
228 
229   // Otherwise, this is an interior node.  Send the request down the tree.
230   auto *IN = cast<DeltaTreeInteriorNode>(this);
231   if (!IN->Children[i]->DoInsertion(FileIndex, Delta, InsertRes))
232     return false; // If there was space in the child, just return.
233 
234   // Okay, this split the subtree, producing a new value and two children to
235   // insert here.  If this node is non-full, we can just insert it directly.
236   if (!isFull()) {
237     // Now that we have two nodes and a new element, insert the perclated value
238     // into ourself by moving all the later values/children down, then inserting
239     // the new one.
240     if (i != e)
241       memmove(&IN->Children[i + 2], &IN->Children[i + 1],
242               (e - i) * sizeof(IN->Children[0]));
243     IN->Children[i] = InsertRes->LHS;
244     IN->Children[i + 1] = InsertRes->RHS;
245 
246     if (e != i)
247       memmove(&Values[i + 1], &Values[i], (e - i) * sizeof(Values[0]));
248     Values[i] = InsertRes->Split;
249     ++NumValuesUsed;
250     return false;
251   }
252 
253   // Finally, if this interior node was full and a node is percolated up, split
254   // ourself and return that up the chain.  Start by saving all our info to
255   // avoid having the split clobber it.
256   IN->Children[i] = InsertRes->LHS;
257   DeltaTreeNode *SubRHS = InsertRes->RHS;
258   SourceDelta SubSplit = InsertRes->Split;
259 
260   // Do the split.
261   DoSplit(*InsertRes);
262 
263   // Figure out where to insert SubRHS/NewSplit.
264   DeltaTreeInteriorNode *InsertSide;
265   if (SubSplit.FileLoc < InsertRes->Split.FileLoc)
266     InsertSide = cast<DeltaTreeInteriorNode>(InsertRes->LHS);
267   else
268     InsertSide = cast<DeltaTreeInteriorNode>(InsertRes->RHS);
269 
270   // We now have a non-empty interior node 'InsertSide' to insert
271   // SubRHS/SubSplit into.  Find out where to insert SubSplit.
272 
273   // Find the insertion point, the first delta whose index is >SubSplit.FileLoc.
274   i = 0;
275   e = InsertSide->getNumValuesUsed();
276   while (i != e && SubSplit.FileLoc > InsertSide->getValue(i).FileLoc)
277     ++i;
278 
279   // Now we know that i is the place to insert the split value into.  Insert it
280   // and the child right after it.
281   if (i != e)
282     memmove(&InsertSide->Children[i + 2], &InsertSide->Children[i + 1],
283             (e - i) * sizeof(IN->Children[0]));
284   InsertSide->Children[i + 1] = SubRHS;
285 
286   if (e != i)
287     memmove(&InsertSide->Values[i + 1], &InsertSide->Values[i],
288             (e - i) * sizeof(Values[0]));
289   InsertSide->Values[i] = SubSplit;
290   ++InsertSide->NumValuesUsed;
291   InsertSide->FullDelta += SubSplit.Delta + SubRHS->getFullDelta();
292   return true;
293 }
294 
295 /// DoSplit - Split the currently full node (which has 2*WidthFactor-1 values)
296 /// into two subtrees each with "WidthFactor-1" values and a pivot value.
297 /// Return the pieces in InsertRes.
298 void DeltaTreeNode::DoSplit(InsertResult &InsertRes) {
299   assert(isFull() && "Why split a non-full node?");
300 
301   // Since this node is full, it contains 2*WidthFactor-1 values.  We move
302   // the first 'WidthFactor-1' values to the LHS child (which we leave in this
303   // node), propagate one value up, and move the last 'WidthFactor-1' values
304   // into the RHS child.
305 
306   // Create the new child node.
307   DeltaTreeNode *NewNode;
308   if (auto *IN = dyn_cast<DeltaTreeInteriorNode>(this)) {
309     // If this is an interior node, also move over 'WidthFactor' children
310     // into the new node.
311     DeltaTreeInteriorNode *New = new DeltaTreeInteriorNode();
312     memcpy(&New->Children[0], &IN->Children[WidthFactor],
313            WidthFactor * sizeof(IN->Children[0]));
314     NewNode = New;
315   } else {
316     // Just create the new leaf node.
317     NewNode = new DeltaTreeNode();
318   }
319 
320   // Move over the last 'WidthFactor-1' values from here to NewNode.
321   memcpy(&NewNode->Values[0], &Values[WidthFactor],
322          (WidthFactor - 1) * sizeof(Values[0]));
323 
324   // Decrease the number of values in the two nodes.
325   NewNode->NumValuesUsed = NumValuesUsed = WidthFactor - 1;
326 
327   // Recompute the two nodes' full delta.
328   NewNode->RecomputeFullDeltaLocally();
329   RecomputeFullDeltaLocally();
330 
331   InsertRes.LHS = this;
332   InsertRes.RHS = NewNode;
333   InsertRes.Split = Values[WidthFactor - 1];
334 }
335 
336 //===----------------------------------------------------------------------===//
337 //                        DeltaTree Implementation
338 //===----------------------------------------------------------------------===//
339 
340 // #define VERIFY_TREE
341 
342 #ifdef VERIFY_TREE
343 /// VerifyTree - Walk the btree performing assertions on various properties to
344 /// verify consistency.  This is useful for debugging new changes to the tree.
345 static void VerifyTree(const DeltaTreeNode *N) {
346   const auto *IN = dyn_cast<DeltaTreeInteriorNode>(N);
347   if (IN == 0) {
348     // Verify leaves, just ensure that FullDelta matches up and the elements
349     // are in proper order.
350     int FullDelta = 0;
351     for (unsigned i = 0, e = N->getNumValuesUsed(); i != e; ++i) {
352       if (i)
353         assert(N->getValue(i - 1).FileLoc < N->getValue(i).FileLoc);
354       FullDelta += N->getValue(i).Delta;
355     }
356     assert(FullDelta == N->getFullDelta());
357     return;
358   }
359 
360   // Verify interior nodes: Ensure that FullDelta matches up and the
361   // elements are in proper order and the children are in proper order.
362   int FullDelta = 0;
363   for (unsigned i = 0, e = IN->getNumValuesUsed(); i != e; ++i) {
364     const SourceDelta &IVal = N->getValue(i);
365     const DeltaTreeNode *IChild = IN->getChild(i);
366     if (i)
367       assert(IN->getValue(i - 1).FileLoc < IVal.FileLoc);
368     FullDelta += IVal.Delta;
369     FullDelta += IChild->getFullDelta();
370 
371     // The largest value in child #i should be smaller than FileLoc.
372     assert(IChild->getValue(IChild->getNumValuesUsed() - 1).FileLoc <
373            IVal.FileLoc);
374 
375     // The smallest value in child #i+1 should be larger than FileLoc.
376     assert(IN->getChild(i + 1)->getValue(0).FileLoc > IVal.FileLoc);
377     VerifyTree(IChild);
378   }
379 
380   FullDelta += IN->getChild(IN->getNumValuesUsed())->getFullDelta();
381 
382   assert(FullDelta == N->getFullDelta());
383 }
384 #endif // VERIFY_TREE
385 
386 static DeltaTreeNode *getRoot(void *Root) { return (DeltaTreeNode *)Root; }
387 
388 DeltaTree::DeltaTree() { Root = new DeltaTreeNode(); }
389 
390 DeltaTree::DeltaTree(const DeltaTree &RHS) {
391   // Currently we only support copying when the RHS is empty.
392   assert(getRoot(RHS.Root)->getNumValuesUsed() == 0 &&
393          "Can only copy empty tree");
394   Root = new DeltaTreeNode();
395 }
396 
397 DeltaTree::~DeltaTree() { getRoot(Root)->Destroy(); }
398 
399 /// getDeltaAt - Return the accumulated delta at the specified file offset.
400 /// This includes all insertions or delections that occurred *before* the
401 /// specified file index.
402 int DeltaTree::getDeltaAt(unsigned FileIndex) const {
403   const DeltaTreeNode *Node = getRoot(Root);
404 
405   int Result = 0;
406 
407   // Walk down the tree.
408   while (true) {
409     // For all nodes, include any local deltas before the specified file
410     // index by summing them up directly.  Keep track of how many were
411     // included.
412     unsigned NumValsGreater = 0;
413     for (unsigned e = Node->getNumValuesUsed(); NumValsGreater != e;
414          ++NumValsGreater) {
415       const SourceDelta &Val = Node->getValue(NumValsGreater);
416 
417       if (Val.FileLoc >= FileIndex)
418         break;
419       Result += Val.Delta;
420     }
421 
422     // If we have an interior node, include information about children and
423     // recurse.  Otherwise, if we have a leaf, we're done.
424     const auto *IN = dyn_cast<DeltaTreeInteriorNode>(Node);
425     if (!IN)
426       return Result;
427 
428     // Include any children to the left of the values we skipped, all of
429     // their deltas should be included as well.
430     for (unsigned i = 0; i != NumValsGreater; ++i)
431       Result += IN->getChild(i)->getFullDelta();
432 
433     // If we found exactly the value we were looking for, break off the
434     // search early.  There is no need to search the RHS of the value for
435     // partial results.
436     if (NumValsGreater != Node->getNumValuesUsed() &&
437         Node->getValue(NumValsGreater).FileLoc == FileIndex)
438       return Result + IN->getChild(NumValsGreater)->getFullDelta();
439 
440     // Otherwise, traverse down the tree.  The selected subtree may be
441     // partially included in the range.
442     Node = IN->getChild(NumValsGreater);
443   }
444   // NOT REACHED.
445 }
446 
447 /// AddDelta - When a change is made that shifts around the text buffer,
448 /// this method is used to record that info.  It inserts a delta of 'Delta'
449 /// into the current DeltaTree at offset FileIndex.
450 void DeltaTree::AddDelta(unsigned FileIndex, int Delta) {
451   assert(Delta && "Adding a noop?");
452   DeltaTreeNode *MyRoot = getRoot(Root);
453 
454   DeltaTreeNode::InsertResult InsertRes;
455   if (MyRoot->DoInsertion(FileIndex, Delta, &InsertRes)) {
456     Root = new DeltaTreeInteriorNode(InsertRes);
457 #ifdef VERIFY_TREE
458     MyRoot = Root;
459 #endif
460   }
461 
462 #ifdef VERIFY_TREE
463   VerifyTree(MyRoot);
464 #endif
465 }
466