xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision 8411e168f8ad4ed51ef39ca1affdf30b84bf94bd)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/FileManager.h"
16 #include "llvm/Support/Compiler.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/System/Path.h"
19 #include "llvm/Bitcode/Serialize.h"
20 #include "llvm/Bitcode/Deserialize.h"
21 #include "llvm/Support/Streams.h"
22 #include <algorithm>
23 using namespace clang;
24 using namespace SrcMgr;
25 using llvm::MemoryBuffer;
26 
27 //===----------------------------------------------------------------------===//
28 // SourceManager Helper Classes
29 //===----------------------------------------------------------------------===//
30 
31 ContentCache::~ContentCache() {
32   delete Buffer;
33 }
34 
35 /// getSizeBytesMapped - Returns the number of bytes actually mapped for
36 ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
37 ///  instantiated.
38 unsigned ContentCache::getSizeBytesMapped() const {
39   return Buffer ? Buffer->getBufferSize() : 0;
40 }
41 
42 /// getSize - Returns the size of the content encapsulated by this ContentCache.
43 ///  This can be the size of the source file or the size of an arbitrary
44 ///  scratch buffer.  If the ContentCache encapsulates a source file, that
45 ///  file is not lazily brought in from disk to satisfy this query.
46 unsigned ContentCache::getSize() const {
47   return Entry ? Entry->getSize() : Buffer->getBufferSize();
48 }
49 
50 const llvm::MemoryBuffer *ContentCache::getBuffer() const {
51   // Lazily create the Buffer for ContentCaches that wrap files.
52   if (!Buffer && Entry) {
53     // FIXME: Should we support a way to not have to do this check over
54     //   and over if we cannot open the file?
55     Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
56   }
57   return Buffer;
58 }
59 
60 //===----------------------------------------------------------------------===//
61 // Line Table Implementation
62 //===----------------------------------------------------------------------===//
63 
64 namespace clang {
65 struct LineEntry {
66   /// FileOffset - The offset in this file that the line entry occurs at.
67   unsigned FileOffset;
68   /// LineNo - The presumed line number of this line entry: #line 4.
69   unsigned LineNo;
70   /// FilenameID - The ID of the filename identified by this line entry:
71   /// #line 4 "foo.c".  This is -1 if not specified.
72   int FilenameID;
73 
74   static LineEntry get(unsigned Offs, unsigned Line, int Filename) {
75     LineEntry E;
76     E.FileOffset = Offs;
77     E.LineNo = Line;
78     E.FilenameID = Filename;
79     return E;
80   }
81 };
82 
83 
84 /// LineTableInfo - This class is used to hold and unique data used to
85 /// represent #line information.
86 class LineTableInfo {
87   /// FilenameIDs - This map is used to assign unique IDs to filenames in
88   /// #line directives.  This allows us to unique the filenames that
89   /// frequently reoccur and reference them with indices.  FilenameIDs holds
90   /// the mapping from string -> ID, and FilenamesByID holds the mapping of ID
91   /// to string.
92   llvm::StringMap<unsigned, llvm::BumpPtrAllocator> FilenameIDs;
93   std::vector<llvm::StringMapEntry<unsigned>*> FilenamesByID;
94 
95   /// LineEntries - This is a map from FileIDs to a list of line entries (sorted
96   /// by the offset they occur in the file.
97   std::map<unsigned, std::vector<LineEntry> > LineEntries;
98 public:
99   LineTableInfo() {
100   }
101 
102   void clear() {
103     FilenameIDs.clear();
104     FilenamesByID.clear();
105   }
106 
107   ~LineTableInfo() {}
108 
109   unsigned getLineTableFilenameID(const char *Ptr, unsigned Len);
110   const char *getFilename(unsigned ID) const {
111     assert(ID < FilenamesByID.size() && "Invalid FilenameID");
112     return FilenamesByID[ID]->getKeyData();
113   }
114 
115   void AddLineNote(unsigned FID, unsigned Offset,
116                    unsigned LineNo, int FilenameID);
117 
118   /// FindNearestLineEntry - Find the line entry nearest to FID that is before
119   /// it.  If there is no line entry before Offset in FID, return null.
120   const LineEntry *FindNearestLineEntry(unsigned FID, unsigned Offset);
121 };
122 } // namespace clang
123 
124 unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
125   // Look up the filename in the string table, returning the pre-existing value
126   // if it exists.
127   llvm::StringMapEntry<unsigned> &Entry =
128     FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
129   if (Entry.getValue() != ~0U)
130     return Entry.getValue();
131 
132   // Otherwise, assign this the next available ID.
133   Entry.setValue(FilenamesByID.size());
134   FilenamesByID.push_back(&Entry);
135   return FilenamesByID.size()-1;
136 }
137 
138 /// AddLineNote - Add a line note to the line table that indicates that there
139 /// is a #line at the specified FID/Offset location which changes the presumed
140 /// location to LineNo/FilenameID.
141 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
142                                 unsigned LineNo, int FilenameID) {
143   std::vector<LineEntry> &Entries = LineEntries[FID];
144 
145   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
146          "Adding line entries out of order!");
147 
148   // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember that
149   // we are still in "foo.h".
150   if (FilenameID == -1 && !Entries.empty())
151     FilenameID = Entries.back().FilenameID;
152 
153   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID));
154 }
155 
156 /// FindNearestLineEntry - Find the line entry nearest to FID that is before
157 /// it.  If there is no line entry before Offset in FID, return null.
158 const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
159                                                      unsigned Offset) {
160   const std::vector<LineEntry> &Entries = LineEntries[FID];
161   assert(!Entries.empty() && "No #line entries for this FID after all!");
162 
163 
164   // FIXME: Dumb linear search.
165   // Find the maximal element that is still before Offset.
166   for (std::vector<LineEntry>::const_reverse_iterator I = Entries.rbegin(),
167        E = Entries.rend(); I != E; ++I)
168     if (I->FileOffset <= Offset) return &*I;
169   return 0;
170 }
171 
172 
173 /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
174 ///
175 unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
176   if (LineTable == 0)
177     LineTable = new LineTableInfo();
178   return LineTable->getLineTableFilenameID(Ptr, Len);
179 }
180 
181 
182 /// AddLineNote - Add a line note to the line table for the FileID and offset
183 /// specified by Loc.  If FilenameID is -1, it is considered to be
184 /// unspecified.
185 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
186                                 int FilenameID) {
187   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
188 
189   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
190 
191   // Remember that this file has #line directives now if it doesn't already.
192   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
193 
194   if (LineTable == 0)
195     LineTable = new LineTableInfo();
196   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
197 }
198 
199 
200 //===----------------------------------------------------------------------===//
201 // Private 'Create' methods.
202 //===----------------------------------------------------------------------===//
203 
204 SourceManager::~SourceManager() {
205   delete LineTable;
206 
207   // Delete FileEntry objects corresponding to content caches.  Since the actual
208   // content cache objects are bump pointer allocated, we just have to run the
209   // dtors, but we call the deallocate method for completeness.
210   for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
211     MemBufferInfos[i]->~ContentCache();
212     ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
213   }
214   for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
215        I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
216     I->second->~ContentCache();
217     ContentCacheAlloc.Deallocate(I->second);
218   }
219 }
220 
221 void SourceManager::clearIDTables() {
222   MainFileID = FileID();
223   SLocEntryTable.clear();
224   LastLineNoFileIDQuery = FileID();
225   LastLineNoContentCache = 0;
226   LastFileIDLookup = FileID();
227 
228   if (LineTable)
229     LineTable->clear();
230 
231   // Use up FileID #0 as an invalid instantiation.
232   NextOffset = 0;
233   createInstantiationLoc(SourceLocation(), SourceLocation(), 1);
234 }
235 
236 /// getOrCreateContentCache - Create or return a cached ContentCache for the
237 /// specified file.
238 const ContentCache *
239 SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
240   assert(FileEnt && "Didn't specify a file entry to use?");
241 
242   // Do we already have information about this file?
243   ContentCache *&Entry = FileInfos[FileEnt];
244   if (Entry) return Entry;
245 
246   // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
247   // so that FileInfo can use the low 3 bits of the pointer for its own
248   // nefarious purposes.
249   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
250   EntryAlign = std::max(8U, EntryAlign);
251   Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
252   new (Entry) ContentCache(FileEnt);
253   return Entry;
254 }
255 
256 
257 /// createMemBufferContentCache - Create a new ContentCache for the specified
258 ///  memory buffer.  This does no caching.
259 const ContentCache*
260 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
261   // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
262   // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
263   // the pointer for its own nefarious purposes.
264   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
265   EntryAlign = std::max(8U, EntryAlign);
266   ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
267   new (Entry) ContentCache();
268   MemBufferInfos.push_back(Entry);
269   Entry->setBuffer(Buffer);
270   return Entry;
271 }
272 
273 //===----------------------------------------------------------------------===//
274 // Methods to create new FileID's and instantiations.
275 //===----------------------------------------------------------------------===//
276 
277 /// createFileID - Create a new fileID for the specified ContentCache and
278 /// include position.  This works regardless of whether the ContentCache
279 /// corresponds to a file or some other input source.
280 FileID SourceManager::createFileID(const ContentCache *File,
281                                    SourceLocation IncludePos,
282                                    SrcMgr::CharacteristicKind FileCharacter) {
283   SLocEntryTable.push_back(SLocEntry::get(NextOffset,
284                                           FileInfo::get(IncludePos, File,
285                                                         FileCharacter)));
286   unsigned FileSize = File->getSize();
287   assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
288   NextOffset += FileSize+1;
289 
290   // Set LastFileIDLookup to the newly created file.  The next getFileID call is
291   // almost guaranteed to be from that file.
292   return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
293 }
294 
295 /// createInstantiationLoc - Return a new SourceLocation that encodes the fact
296 /// that a token from SpellingLoc should actually be referenced from
297 /// InstantiationLoc.
298 SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
299                                                      SourceLocation InstantLoc,
300                                                      unsigned TokLength) {
301   SLocEntryTable.push_back(SLocEntry::get(NextOffset,
302                                           InstantiationInfo::get(InstantLoc,
303                                                                  SpellingLoc)));
304   assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
305   NextOffset += TokLength+1;
306   return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
307 }
308 
309 /// getBufferData - Return a pointer to the start and end of the source buffer
310 /// data for the specified FileID.
311 std::pair<const char*, const char*>
312 SourceManager::getBufferData(FileID FID) const {
313   const llvm::MemoryBuffer *Buf = getBuffer(FID);
314   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
315 }
316 
317 
318 //===----------------------------------------------------------------------===//
319 // SourceLocation manipulation methods.
320 //===----------------------------------------------------------------------===//
321 
322 /// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
323 /// method that is used for all SourceManager queries that start with a
324 /// SourceLocation object.  It is responsible for finding the entry in
325 /// SLocEntryTable which contains the specified location.
326 ///
327 FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
328   assert(SLocOffset && "Invalid FileID");
329 
330   // After the first and second level caches, I see two common sorts of
331   // behavior: 1) a lot of searched FileID's are "near" the cached file location
332   // or are "near" the cached instantiation location.  2) others are just
333   // completely random and may be a very long way away.
334   //
335   // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
336   // then we fall back to a less cache efficient, but more scalable, binary
337   // search to find the location.
338 
339   // See if this is near the file point - worst case we start scanning from the
340   // most newly created FileID.
341   std::vector<SrcMgr::SLocEntry>::const_iterator I;
342 
343   if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
344     // Neither loc prunes our search.
345     I = SLocEntryTable.end();
346   } else {
347     // Perhaps it is near the file point.
348     I = SLocEntryTable.begin()+LastFileIDLookup.ID;
349   }
350 
351   // Find the FileID that contains this.  "I" is an iterator that points to a
352   // FileID whose offset is known to be larger than SLocOffset.
353   unsigned NumProbes = 0;
354   while (1) {
355     --I;
356     if (I->getOffset() <= SLocOffset) {
357 #if 0
358       printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
359              I-SLocEntryTable.begin(),
360              I->isInstantiation() ? "inst" : "file",
361              LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
362 #endif
363       FileID Res = FileID::get(I-SLocEntryTable.begin());
364 
365       // If this isn't an instantiation, remember it.  We have good locality
366       // across FileID lookups.
367       if (!I->isInstantiation())
368         LastFileIDLookup = Res;
369       NumLinearScans += NumProbes+1;
370       return Res;
371     }
372     if (++NumProbes == 8)
373       break;
374   }
375 
376   // Convert "I" back into an index.  We know that it is an entry whose index is
377   // larger than the offset we are looking for.
378   unsigned GreaterIndex = I-SLocEntryTable.begin();
379   // LessIndex - This is the lower bound of the range that we're searching.
380   // We know that the offset corresponding to the FileID is is less than
381   // SLocOffset.
382   unsigned LessIndex = 0;
383   NumProbes = 0;
384   while (1) {
385     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
386     unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
387 
388     ++NumProbes;
389 
390     // If the offset of the midpoint is too large, chop the high side of the
391     // range to the midpoint.
392     if (MidOffset > SLocOffset) {
393       GreaterIndex = MiddleIndex;
394       continue;
395     }
396 
397     // If the middle index contains the value, succeed and return.
398     if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
399 #if 0
400       printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
401              I-SLocEntryTable.begin(),
402              I->isInstantiation() ? "inst" : "file",
403              LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
404 #endif
405       FileID Res = FileID::get(MiddleIndex);
406 
407       // If this isn't an instantiation, remember it.  We have good locality
408       // across FileID lookups.
409       if (!I->isInstantiation())
410         LastFileIDLookup = Res;
411       NumBinaryProbes += NumProbes;
412       return Res;
413     }
414 
415     // Otherwise, move the low-side up to the middle index.
416     LessIndex = MiddleIndex;
417   }
418 }
419 
420 SourceLocation SourceManager::
421 getInstantiationLocSlowCase(SourceLocation Loc) const {
422   do {
423     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
424     Loc =getSLocEntry(LocInfo.first).getInstantiation().getInstantiationLoc();
425     Loc = Loc.getFileLocWithOffset(LocInfo.second);
426   } while (!Loc.isFileID());
427 
428   return Loc;
429 }
430 
431 SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
432   do {
433     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
434     Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
435     Loc = Loc.getFileLocWithOffset(LocInfo.second);
436   } while (!Loc.isFileID());
437   return Loc;
438 }
439 
440 
441 std::pair<FileID, unsigned>
442 SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
443                                                      unsigned Offset) const {
444   // If this is an instantiation record, walk through all the instantiation
445   // points.
446   FileID FID;
447   SourceLocation Loc;
448   do {
449     Loc = E->getInstantiation().getInstantiationLoc();
450 
451     FID = getFileID(Loc);
452     E = &getSLocEntry(FID);
453     Offset += Loc.getOffset()-E->getOffset();
454   } while (!Loc.isFileID());
455 
456   return std::make_pair(FID, Offset);
457 }
458 
459 std::pair<FileID, unsigned>
460 SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
461                                                 unsigned Offset) const {
462   // If this is an instantiation record, walk through all the instantiation
463   // points.
464   FileID FID;
465   SourceLocation Loc;
466   do {
467     Loc = E->getInstantiation().getSpellingLoc();
468 
469     FID = getFileID(Loc);
470     E = &getSLocEntry(FID);
471     Offset += Loc.getOffset()-E->getOffset();
472   } while (!Loc.isFileID());
473 
474   return std::make_pair(FID, Offset);
475 }
476 
477 
478 //===----------------------------------------------------------------------===//
479 // Queries about the code at a SourceLocation.
480 //===----------------------------------------------------------------------===//
481 
482 /// getCharacterData - Return a pointer to the start of the specified location
483 /// in the appropriate MemoryBuffer.
484 const char *SourceManager::getCharacterData(SourceLocation SL) const {
485   // Note that this is a hot function in the getSpelling() path, which is
486   // heavily used by -E mode.
487   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
488 
489   // Note that calling 'getBuffer()' may lazily page in a source file.
490   return getSLocEntry(LocInfo.first).getFile().getContentCache()
491               ->getBuffer()->getBufferStart() + LocInfo.second;
492 }
493 
494 
495 /// getColumnNumber - Return the column # for the specified file position.
496 /// this is significantly cheaper to compute than the line number.
497 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
498   const char *Buf = getBuffer(FID)->getBufferStart();
499 
500   unsigned LineStart = FilePos;
501   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
502     --LineStart;
503   return FilePos-LineStart+1;
504 }
505 
506 unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
507   if (Loc.isInvalid()) return 0;
508   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
509   return getColumnNumber(LocInfo.first, LocInfo.second);
510 }
511 
512 unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
513   if (Loc.isInvalid()) return 0;
514   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
515   return getColumnNumber(LocInfo.first, LocInfo.second);
516 }
517 
518 
519 
520 static void ComputeLineNumbers(ContentCache* FI,
521                                llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
522 static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
523   // Note that calling 'getBuffer()' may lazily page in the file.
524   const MemoryBuffer *Buffer = FI->getBuffer();
525 
526   // Find the file offsets of all of the *physical* source lines.  This does
527   // not look at trigraphs, escaped newlines, or anything else tricky.
528   std::vector<unsigned> LineOffsets;
529 
530   // Line #1 starts at char 0.
531   LineOffsets.push_back(0);
532 
533   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
534   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
535   unsigned Offs = 0;
536   while (1) {
537     // Skip over the contents of the line.
538     // TODO: Vectorize this?  This is very performance sensitive for programs
539     // with lots of diagnostics and in -E mode.
540     const unsigned char *NextBuf = (const unsigned char *)Buf;
541     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
542       ++NextBuf;
543     Offs += NextBuf-Buf;
544     Buf = NextBuf;
545 
546     if (Buf[0] == '\n' || Buf[0] == '\r') {
547       // If this is \n\r or \r\n, skip both characters.
548       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
549         ++Offs, ++Buf;
550       ++Offs, ++Buf;
551       LineOffsets.push_back(Offs);
552     } else {
553       // Otherwise, this is a null.  If end of file, exit.
554       if (Buf == End) break;
555       // Otherwise, skip the null.
556       ++Offs, ++Buf;
557     }
558   }
559 
560   // Copy the offsets into the FileInfo structure.
561   FI->NumLines = LineOffsets.size();
562   FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
563   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
564 }
565 
566 /// getLineNumber - Given a SourceLocation, return the spelling line number
567 /// for the position indicated.  This requires building and caching a table of
568 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
569 /// about to emit a diagnostic.
570 unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
571   ContentCache *Content;
572   if (LastLineNoFileIDQuery == FID)
573     Content = LastLineNoContentCache;
574   else
575     Content = const_cast<ContentCache*>(getSLocEntry(FID)
576                                         .getFile().getContentCache());
577 
578   // If this is the first use of line information for this buffer, compute the
579   /// SourceLineCache for it on demand.
580   if (Content->SourceLineCache == 0)
581     ComputeLineNumbers(Content, ContentCacheAlloc);
582 
583   // Okay, we know we have a line number table.  Do a binary search to find the
584   // line number that this character position lands on.
585   unsigned *SourceLineCache = Content->SourceLineCache;
586   unsigned *SourceLineCacheStart = SourceLineCache;
587   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
588 
589   unsigned QueriedFilePos = FilePos+1;
590 
591   // If the previous query was to the same file, we know both the file pos from
592   // that query and the line number returned.  This allows us to narrow the
593   // search space from the entire file to something near the match.
594   if (LastLineNoFileIDQuery == FID) {
595     if (QueriedFilePos >= LastLineNoFilePos) {
596       SourceLineCache = SourceLineCache+LastLineNoResult-1;
597 
598       // The query is likely to be nearby the previous one.  Here we check to
599       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
600       // where big comment blocks and vertical whitespace eat up lines but
601       // contribute no tokens.
602       if (SourceLineCache+5 < SourceLineCacheEnd) {
603         if (SourceLineCache[5] > QueriedFilePos)
604           SourceLineCacheEnd = SourceLineCache+5;
605         else if (SourceLineCache+10 < SourceLineCacheEnd) {
606           if (SourceLineCache[10] > QueriedFilePos)
607             SourceLineCacheEnd = SourceLineCache+10;
608           else if (SourceLineCache+20 < SourceLineCacheEnd) {
609             if (SourceLineCache[20] > QueriedFilePos)
610               SourceLineCacheEnd = SourceLineCache+20;
611           }
612         }
613       }
614     } else {
615       SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
616     }
617   }
618 
619   // If the spread is large, do a "radix" test as our initial guess, based on
620   // the assumption that lines average to approximately the same length.
621   // NOTE: This is currently disabled, as it does not appear to be profitable in
622   // initial measurements.
623   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
624     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
625 
626     // Take a stab at guessing where it is.
627     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
628 
629     // Check for -10 and +10 lines.
630     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
631     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
632 
633     // If the computed lower bound is less than the query location, move it in.
634     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
635         SourceLineCacheStart[LowerBound] < QueriedFilePos)
636       SourceLineCache = SourceLineCacheStart+LowerBound;
637 
638     // If the computed upper bound is greater than the query location, move it.
639     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
640         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
641       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
642   }
643 
644   unsigned *Pos
645     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
646   unsigned LineNo = Pos-SourceLineCacheStart;
647 
648   LastLineNoFileIDQuery = FID;
649   LastLineNoContentCache = Content;
650   LastLineNoFilePos = QueriedFilePos;
651   LastLineNoResult = LineNo;
652   return LineNo;
653 }
654 
655 unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
656   if (Loc.isInvalid()) return 0;
657   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
658   return getLineNumber(LocInfo.first, LocInfo.second);
659 }
660 unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
661   if (Loc.isInvalid()) return 0;
662   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
663   return getLineNumber(LocInfo.first, LocInfo.second);
664 }
665 
666 
667 /// getPresumedLoc - This method returns the "presumed" location of a
668 /// SourceLocation specifies.  A "presumed location" can be modified by #line
669 /// or GNU line marker directives.  This provides a view on the data that a
670 /// user should see in diagnostics, for example.
671 ///
672 /// Note that a presumed location is always given as the instantiation point
673 /// of an instantiation location, not at the spelling location.
674 PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
675   if (Loc.isInvalid()) return PresumedLoc();
676 
677   // Presumed locations are always for instantiation points.
678   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
679 
680   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
681   const SrcMgr::ContentCache *C = FI.getContentCache();
682 
683   // To get the source name, first consult the FileEntry (if one exists)
684   // before the MemBuffer as this will avoid unnecessarily paging in the
685   // MemBuffer.
686   const char *Filename =
687     C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
688   unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
689   unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
690   SourceLocation IncludeLoc = FI.getIncludeLoc();
691 
692   // If we have #line directives in this file, update and overwrite the physical
693   // location info if appropriate.
694   if (FI.hasLineDirectives()) {
695     assert(LineTable && "Can't have linetable entries without a LineTable!");
696     // See if there is a #line directive before this.  If so, get it.
697     if (const LineEntry *Entry =
698           LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
699       // If the LineEntry indicates a filename, use it.
700       if (Entry->FilenameID != -1)
701         Filename = LineTable->getFilename(Entry->FilenameID);
702 
703       // Use the line number specified by the LineEntry.  This line number may
704       // be multiple lines down from the line entry.  Add the difference in
705       // physical line numbers from the query point and the line marker to the
706       // total.
707       unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
708       LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
709 
710       // Note that column numbers are not molested by line markers.
711     }
712   }
713 
714   return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
715 }
716 
717 //===----------------------------------------------------------------------===//
718 // Other miscellaneous methods.
719 //===----------------------------------------------------------------------===//
720 
721 
722 /// PrintStats - Print statistics to stderr.
723 ///
724 void SourceManager::PrintStats() const {
725   llvm::cerr << "\n*** Source Manager Stats:\n";
726   llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
727              << " mem buffers mapped.\n";
728   llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, "
729              << NextOffset << "B of Sloc address space used.\n";
730 
731   unsigned NumLineNumsComputed = 0;
732   unsigned NumFileBytesMapped = 0;
733   for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
734     NumLineNumsComputed += I->second->SourceLineCache != 0;
735     NumFileBytesMapped  += I->second->getSizeBytesMapped();
736   }
737 
738   llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
739              << NumLineNumsComputed << " files with line #'s computed.\n";
740   llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
741              << NumBinaryProbes << " binary.\n";
742 }
743 
744 //===----------------------------------------------------------------------===//
745 // Serialization.
746 //===----------------------------------------------------------------------===//
747 
748 void ContentCache::Emit(llvm::Serializer& S) const {
749   S.FlushRecord();
750   S.EmitPtr(this);
751 
752   if (Entry) {
753     llvm::sys::Path Fname(Buffer->getBufferIdentifier());
754 
755     if (Fname.isAbsolute())
756       S.EmitCStr(Fname.c_str());
757     else {
758       // Create an absolute path.
759       // FIXME: This will potentially contain ".." and "." in the path.
760       llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
761       path.appendComponent(Fname.c_str());
762       S.EmitCStr(path.c_str());
763     }
764   }
765   else {
766     const char* p = Buffer->getBufferStart();
767     const char* e = Buffer->getBufferEnd();
768 
769     S.EmitInt(e-p);
770 
771     for ( ; p != e; ++p)
772       S.EmitInt(*p);
773   }
774 
775   S.FlushRecord();
776 }
777 
778 void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
779                                        SourceManager& SMgr,
780                                        FileManager* FMgr,
781                                        std::vector<char>& Buf) {
782   if (FMgr) {
783     llvm::SerializedPtrID PtrID = D.ReadPtrID();
784     D.ReadCStr(Buf,false);
785 
786     // Create/fetch the FileEntry.
787     const char* start = &Buf[0];
788     const FileEntry* E = FMgr->getFile(start,start+Buf.size());
789 
790     // FIXME: Ideally we want a lazy materialization of the ContentCache
791     //  anyway, because we don't want to read in source files unless this
792     //  is absolutely needed.
793     if (!E)
794       D.RegisterPtr(PtrID,NULL);
795     else
796       // Get the ContextCache object and register it with the deserializer.
797       D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
798     return;
799   }
800 
801   // Register the ContextCache object with the deserializer.
802   /* FIXME:
803   ContentCache *Entry
804   SMgr.MemBufferInfos.push_back(ContentCache());
805    = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
806   D.RegisterPtr(&Entry);
807 
808   // Create the buffer.
809   unsigned Size = D.ReadInt();
810   Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
811 
812   // Read the contents of the buffer.
813   char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
814   for (unsigned i = 0; i < Size ; ++i)
815     p[i] = D.ReadInt();
816    */
817 }
818 
819 void SourceManager::Emit(llvm::Serializer& S) const {
820   S.EnterBlock();
821   S.EmitPtr(this);
822   S.EmitInt(MainFileID.getOpaqueValue());
823 
824   // Emit: FileInfos.  Just emit the file name.
825   S.EnterBlock();
826 
827   // FIXME: Emit FileInfos.
828   //std::for_each(FileInfos.begin(), FileInfos.end(),
829   //              S.MakeEmitter<ContentCache>());
830 
831   S.ExitBlock();
832 
833   // Emit: MemBufferInfos
834   S.EnterBlock();
835 
836   /* FIXME: EMIT.
837   std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
838                 S.MakeEmitter<ContentCache>());
839    */
840 
841   S.ExitBlock();
842 
843   // FIXME: Emit SLocEntryTable.
844 
845   S.ExitBlock();
846 }
847 
848 SourceManager*
849 SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) {
850   SourceManager *M = new SourceManager();
851   D.RegisterPtr(M);
852 
853   // Read: the FileID of the main source file of the translation unit.
854   M->MainFileID = FileID::get(D.ReadInt());
855 
856   std::vector<char> Buf;
857 
858   /*{ // FIXME Read: FileInfos.
859     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
860     while (!D.FinishedBlock(BLoc))
861     ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
862   }*/
863 
864   { // Read: MemBufferInfos.
865     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
866     while (!D.FinishedBlock(BLoc))
867     ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
868   }
869 
870   // FIXME: Read SLocEntryTable.
871 
872   return M;
873 }
874