1 //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // We wish to encode the SourceLocation from other module file not dependent 10 // on the other module file. So that the source location changes from other 11 // module file may not affect the contents of the current module file. Then the 12 // users don't need to recompile the whole project due to a new line in a module 13 // unit in the root of the dependency graph. 14 // 15 // To achieve this, we need to encode the index of the module file into the 16 // encoding of the source location. The encoding of the source location may be: 17 // 18 // |-----------------------|-----------------------| 19 // | A | B | C | 20 // 21 // * A: 32 bit. The index of the module file in the module manager + 1. The +1 22 // here is necessary since we wish 0 stands for the current module file. 23 // * B: 31 bit. The offset of the source location to the module file containing 24 // it. 25 // * C: The macro bit. We rotate it to the lowest bit so that we can save some 26 // space in case the index of the module file is 0. 27 // 28 // Specially, if the index of the module file is 0, we allow to encode a 29 // sequence of locations we store only differences between successive elements. 30 // 31 //===----------------------------------------------------------------------===// 32 33 #include "clang/Basic/SourceLocation.h" 34 #include "llvm/Support/MathExtras.h" 35 #include <climits> 36 37 #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H 38 #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H 39 40 namespace clang { 41 class SourceLocationSequence; 42 43 /// Serialized encoding of SourceLocations without context. 44 /// Optimized to have small unsigned values (=> small after VBR encoding). 45 /// 46 // Macro locations have the top bit set, we rotate by one so it is the low bit. 47 class SourceLocationEncoding { 48 using UIntTy = SourceLocation::UIntTy; 49 constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy); 50 51 static UIntTy encodeRaw(UIntTy Raw) { 52 return (Raw << 1) | (Raw >> (UIntBits - 1)); 53 } 54 static UIntTy decodeRaw(UIntTy Raw) { 55 return (Raw >> 1) | (Raw << (UIntBits - 1)); 56 } 57 friend SourceLocationSequence; 58 59 public: 60 using RawLocEncoding = uint64_t; 61 62 static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset, 63 unsigned BaseModuleFileIndex, 64 SourceLocationSequence * = nullptr); 65 static std::pair<SourceLocation, unsigned> 66 decode(RawLocEncoding, SourceLocationSequence * = nullptr); 67 }; 68 69 /// Serialized encoding of a sequence of SourceLocations. 70 /// 71 /// Optimized to produce small values when locations with the sequence are 72 /// similar. Each element can be delta-encoded against the last nonzero element. 73 /// 74 /// Sequences should be started by creating a SourceLocationSequence::State, 75 /// and then passed around as SourceLocationSequence*. Example: 76 /// 77 /// // establishes a sequence 78 /// void EmitTopLevelThing() { 79 /// SourceLocationSequence::State Seq; 80 /// EmitContainedThing(Seq); 81 /// EmitRecursiveThing(Seq); 82 /// } 83 /// 84 /// // optionally part of a sequence 85 /// void EmitContainedThing(SourceLocationSequence *Seq = nullptr) { 86 /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); 87 /// } 88 /// 89 /// // establishes a sequence if there isn't one already 90 /// void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) { 91 /// SourceLocationSequence::State Seq(ParentSeq); 92 /// Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq)); 93 /// EmitRecursiveThing(Seq); 94 /// } 95 /// 96 class SourceLocationSequence { 97 using UIntTy = SourceLocation::UIntTy; 98 using EncodedTy = uint64_t; 99 constexpr static auto UIntBits = SourceLocationEncoding::UIntBits; 100 static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!"); 101 102 // Prev stores the rotated last nonzero location. 103 UIntTy &Prev; 104 105 // Zig-zag encoding turns small signed integers into small unsigned integers. 106 // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ... 107 static UIntTy zigZag(UIntTy V) { 108 UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0); 109 return Sign ^ (V << 1); 110 } 111 static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); } 112 113 SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {} 114 115 EncodedTy encodeRaw(UIntTy Raw) { 116 if (Raw == 0) 117 return 0; 118 UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw); 119 if (Prev == 0) 120 return Prev = Rotated; 121 UIntTy Delta = Rotated - Prev; 122 Prev = Rotated; 123 // Exactly one 33 bit value is possible! (1 << 32). 124 // This is because we have two representations of zero: trivial & relative. 125 return 1 + EncodedTy{zigZag(Delta)}; 126 } 127 UIntTy decodeRaw(EncodedTy Encoded) { 128 if (Encoded == 0) 129 return 0; 130 if (Prev == 0) 131 return SourceLocationEncoding::decodeRaw(Prev = Encoded); 132 return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1)); 133 } 134 135 public: 136 SourceLocation decode(EncodedTy Encoded) { 137 return SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); 138 } 139 EncodedTy encode(SourceLocation Loc) { 140 return encodeRaw(Loc.getRawEncoding()); 141 } 142 143 class State; 144 }; 145 146 /// This object establishes a SourceLocationSequence. 147 class SourceLocationSequence::State { 148 UIntTy Prev = 0; 149 SourceLocationSequence Seq; 150 151 public: 152 // If Parent is provided and non-null, then this root becomes part of that 153 // enclosing sequence instead of establishing a new one. 154 State(SourceLocationSequence *Parent = nullptr) 155 : Seq(Parent ? Parent->Prev : Prev) {} 156 157 // Implicit conversion for uniform use of roots vs propagated sequences. 158 operator SourceLocationSequence *() { return &Seq; } 159 }; 160 161 inline SourceLocationEncoding::RawLocEncoding 162 SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset, 163 unsigned BaseModuleFileIndex, 164 SourceLocationSequence *Seq) { 165 // If the source location is a local source location, we can try to optimize 166 // the similar sequences to only record the differences. 167 if (!BaseOffset) 168 return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding()); 169 170 if (Loc.isInvalid()) 171 return 0; 172 173 // Otherwise, the higher bits are used to store the module file index, 174 // so it is meaningless to optimize the source locations into small 175 // integers. Let's try to always use the raw encodings. 176 assert(Loc.getOffset() >= BaseOffset); 177 Loc = Loc.getLocWithOffset(-BaseOffset); 178 RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding()); 179 180 // 16 bits should be sufficient to store the module file index. 181 assert(BaseModuleFileIndex < (1 << 16)); 182 Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32; 183 return Encoded; 184 } 185 inline std::pair<SourceLocation, unsigned> 186 SourceLocationEncoding::decode(RawLocEncoding Encoded, 187 SourceLocationSequence *Seq) { 188 unsigned ModuleFileIndex = Encoded >> 32; 189 190 if (!ModuleFileIndex) 191 return {Seq ? Seq->decode(Encoded) 192 : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)), 193 ModuleFileIndex}; 194 195 Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32); 196 SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded)); 197 198 return {Loc, ModuleFileIndex}; 199 } 200 201 } // namespace clang 202 #endif 203