xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision 1ac3c9292f7172609cd7c82e2f749ed9de720bb7)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/FileManager.h"
16 #include "llvm/Support/Compiler.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/System/Path.h"
19 #include "llvm/Bitcode/Serialize.h"
20 #include "llvm/Bitcode/Deserialize.h"
21 #include "llvm/Support/Streams.h"
22 #include <algorithm>
23 using namespace clang;
24 using namespace SrcMgr;
25 using llvm::MemoryBuffer;
26 
27 //===----------------------------------------------------------------------===//
28 // SourceManager Helper Classes
29 //===----------------------------------------------------------------------===//
30 
31 ContentCache::~ContentCache() {
32   delete Buffer;
33 }
34 
35 /// getSizeBytesMapped - Returns the number of bytes actually mapped for
36 ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
37 ///  instantiated.
38 unsigned ContentCache::getSizeBytesMapped() const {
39   return Buffer ? Buffer->getBufferSize() : 0;
40 }
41 
42 /// getSize - Returns the size of the content encapsulated by this ContentCache.
43 ///  This can be the size of the source file or the size of an arbitrary
44 ///  scratch buffer.  If the ContentCache encapsulates a source file, that
45 ///  file is not lazily brought in from disk to satisfy this query.
46 unsigned ContentCache::getSize() const {
47   return Entry ? Entry->getSize() : Buffer->getBufferSize();
48 }
49 
50 const llvm::MemoryBuffer *ContentCache::getBuffer() const {
51   // Lazily create the Buffer for ContentCaches that wrap files.
52   if (!Buffer && Entry) {
53     // FIXME: Should we support a way to not have to do this check over
54     //   and over if we cannot open the file?
55     Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
56   }
57   return Buffer;
58 }
59 
60 //===----------------------------------------------------------------------===//
61 // Line Table Implementation
62 //===----------------------------------------------------------------------===//
63 
64 namespace clang {
65 struct LineEntry {
66   /// FileOffset - The offset in this file that the line entry occurs at.
67   unsigned FileOffset;
68 
69   /// LineNo - The presumed line number of this line entry: #line 4.
70   unsigned LineNo;
71 
72   /// FilenameID - The ID of the filename identified by this line entry:
73   /// #line 4 "foo.c".  This is -1 if not specified.
74   int FilenameID;
75 
76   /// Flags - Set the 0 if no flags, 1 if a system header,
77   SrcMgr::CharacteristicKind FileKind;
78 
79   /// IncludeOffset - This is the offset of the virtual include stack location,
80   /// which is manipulated by GNU linemarker directives.  If this is 0 then
81   /// there is no virtual #includer.
82   unsigned IncludeOffset;
83 
84   static LineEntry get(unsigned Offs, unsigned Line, int Filename,
85                        SrcMgr::CharacteristicKind FileKind,
86                        unsigned IncludeOffset) {
87     LineEntry E;
88     E.FileOffset = Offs;
89     E.LineNo = Line;
90     E.FilenameID = Filename;
91     E.FileKind = FileKind;
92     E.IncludeOffset = IncludeOffset;
93     return E;
94   }
95 };
96 
97 // needed for FindNearestLineEntry (upper_bound of LineEntry)
98 inline bool operator<(const LineEntry &lhs, const LineEntry &rhs) {
99   // FIXME: should check the other field?
100   return lhs.FileOffset < rhs.FileOffset;
101 }
102 
103 inline bool operator<(const LineEntry &E, unsigned Offset) {
104   return E.FileOffset < Offset;
105 }
106 
107 inline bool operator<(unsigned Offset, const LineEntry &E) {
108   return Offset < E.FileOffset;
109 }
110 
111 /// LineTableInfo - This class is used to hold and unique data used to
112 /// represent #line information.
113 class LineTableInfo {
114   /// FilenameIDs - This map is used to assign unique IDs to filenames in
115   /// #line directives.  This allows us to unique the filenames that
116   /// frequently reoccur and reference them with indices.  FilenameIDs holds
117   /// the mapping from string -> ID, and FilenamesByID holds the mapping of ID
118   /// to string.
119   llvm::StringMap<unsigned, llvm::BumpPtrAllocator> FilenameIDs;
120   std::vector<llvm::StringMapEntry<unsigned>*> FilenamesByID;
121 
122   /// LineEntries - This is a map from FileIDs to a list of line entries (sorted
123   /// by the offset they occur in the file.
124   std::map<unsigned, std::vector<LineEntry> > LineEntries;
125 public:
126   LineTableInfo() {
127   }
128 
129   void clear() {
130     FilenameIDs.clear();
131     FilenamesByID.clear();
132     LineEntries.clear();
133   }
134 
135   ~LineTableInfo() {}
136 
137   unsigned getLineTableFilenameID(const char *Ptr, unsigned Len);
138   const char *getFilename(unsigned ID) const {
139     assert(ID < FilenamesByID.size() && "Invalid FilenameID");
140     return FilenamesByID[ID]->getKeyData();
141   }
142 
143   void AddLineNote(unsigned FID, unsigned Offset,
144                    unsigned LineNo, int FilenameID);
145   void AddLineNote(unsigned FID, unsigned Offset,
146                    unsigned LineNo, int FilenameID,
147                    unsigned EntryExit, SrcMgr::CharacteristicKind FileKind);
148 
149 
150   /// FindNearestLineEntry - Find the line entry nearest to FID that is before
151   /// it.  If there is no line entry before Offset in FID, return null.
152   const LineEntry *FindNearestLineEntry(unsigned FID, unsigned Offset);
153 };
154 } // namespace clang
155 
156 unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
157   // Look up the filename in the string table, returning the pre-existing value
158   // if it exists.
159   llvm::StringMapEntry<unsigned> &Entry =
160     FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
161   if (Entry.getValue() != ~0U)
162     return Entry.getValue();
163 
164   // Otherwise, assign this the next available ID.
165   Entry.setValue(FilenamesByID.size());
166   FilenamesByID.push_back(&Entry);
167   return FilenamesByID.size()-1;
168 }
169 
170 /// AddLineNote - Add a line note to the line table that indicates that there
171 /// is a #line at the specified FID/Offset location which changes the presumed
172 /// location to LineNo/FilenameID.
173 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
174                                 unsigned LineNo, int FilenameID) {
175   std::vector<LineEntry> &Entries = LineEntries[FID];
176 
177   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
178          "Adding line entries out of order!");
179 
180   SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
181   unsigned IncludeOffset = 0;
182 
183   if (!Entries.empty()) {
184     // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
185     // that we are still in "foo.h".
186     if (FilenameID == -1)
187       FilenameID = Entries.back().FilenameID;
188 
189     // If we are after a line marker that switched us to system header mode, or
190     // that set #include information, preserve it.
191     Kind = Entries.back().FileKind;
192     IncludeOffset = Entries.back().IncludeOffset;
193   }
194 
195   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
196                                    IncludeOffset));
197 }
198 
199 /// AddLineNote This is the same as the previous version of AddLineNote, but is
200 /// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
201 /// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
202 /// this is a file exit.  FileKind specifies whether this is a system header or
203 /// extern C system header.
204 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
205                                 unsigned LineNo, int FilenameID,
206                                 unsigned EntryExit,
207                                 SrcMgr::CharacteristicKind FileKind) {
208   assert(FilenameID != -1 && "Unspecified filename should use other accessor");
209 
210   std::vector<LineEntry> &Entries = LineEntries[FID];
211 
212   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
213          "Adding line entries out of order!");
214 
215   unsigned IncludeOffset = 0;
216   if (EntryExit == 0) {  // No #include stack change.
217     IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
218   } else if (EntryExit == 1) {
219     IncludeOffset = Offset-1;
220   } else if (EntryExit == 2) {
221     assert(!Entries.empty() && Entries.back().IncludeOffset &&
222        "PPDirectives should have caught case when popping empty include stack");
223 
224     // Get the include loc of the last entries' include loc as our include loc.
225     IncludeOffset = 0;
226     if (const LineEntry *PrevEntry =
227           FindNearestLineEntry(FID, Entries.back().IncludeOffset))
228       IncludeOffset = PrevEntry->IncludeOffset;
229   }
230 
231   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
232                                    IncludeOffset));
233 }
234 
235 
236 /// FindNearestLineEntry - Find the line entry nearest to FID that is before
237 /// it.  If there is no line entry before Offset in FID, return null.
238 const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
239                                                      unsigned Offset) {
240   const std::vector<LineEntry> &Entries = LineEntries[FID];
241   assert(!Entries.empty() && "No #line entries for this FID after all!");
242 
243   // It is very common for the query to be after the last #line, check this
244   // first.
245   if (Entries.back().FileOffset <= Offset)
246     return &Entries.back();
247 
248   // Do a binary search to find the maximal element that is still before Offset.
249   std::vector<LineEntry>::const_iterator I =
250     std::upper_bound(Entries.begin(), Entries.end(), Offset);
251   if (I == Entries.begin()) return 0;
252   return &*--I;
253 }
254 
255 
256 /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
257 ///
258 unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
259   if (LineTable == 0)
260     LineTable = new LineTableInfo();
261   return LineTable->getLineTableFilenameID(Ptr, Len);
262 }
263 
264 
265 /// AddLineNote - Add a line note to the line table for the FileID and offset
266 /// specified by Loc.  If FilenameID is -1, it is considered to be
267 /// unspecified.
268 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
269                                 int FilenameID) {
270   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
271 
272   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
273 
274   // Remember that this file has #line directives now if it doesn't already.
275   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
276 
277   if (LineTable == 0)
278     LineTable = new LineTableInfo();
279   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
280 }
281 
282 /// AddLineNote - Add a GNU line marker to the line table.
283 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
284                                 int FilenameID, bool IsFileEntry,
285                                 bool IsFileExit, bool IsSystemHeader,
286                                 bool IsExternCHeader) {
287   // If there is no filename and no flags, this is treated just like a #line,
288   // which does not change the flags of the previous line marker.
289   if (FilenameID == -1) {
290     assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
291            "Can't set flags without setting the filename!");
292     return AddLineNote(Loc, LineNo, FilenameID);
293   }
294 
295   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
296   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
297 
298   // Remember that this file has #line directives now if it doesn't already.
299   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
300 
301   if (LineTable == 0)
302     LineTable = new LineTableInfo();
303 
304   SrcMgr::CharacteristicKind FileKind;
305   if (IsExternCHeader)
306     FileKind = SrcMgr::C_ExternCSystem;
307   else if (IsSystemHeader)
308     FileKind = SrcMgr::C_System;
309   else
310     FileKind = SrcMgr::C_User;
311 
312   unsigned EntryExit = 0;
313   if (IsFileEntry)
314     EntryExit = 1;
315   else if (IsFileExit)
316     EntryExit = 2;
317 
318   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
319                          EntryExit, FileKind);
320 }
321 
322 
323 //===----------------------------------------------------------------------===//
324 // Private 'Create' methods.
325 //===----------------------------------------------------------------------===//
326 
327 SourceManager::~SourceManager() {
328   delete LineTable;
329 
330   // Delete FileEntry objects corresponding to content caches.  Since the actual
331   // content cache objects are bump pointer allocated, we just have to run the
332   // dtors, but we call the deallocate method for completeness.
333   for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
334     MemBufferInfos[i]->~ContentCache();
335     ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
336   }
337   for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
338        I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
339     I->second->~ContentCache();
340     ContentCacheAlloc.Deallocate(I->second);
341   }
342 }
343 
344 void SourceManager::clearIDTables() {
345   MainFileID = FileID();
346   SLocEntryTable.clear();
347   LastLineNoFileIDQuery = FileID();
348   LastLineNoContentCache = 0;
349   LastFileIDLookup = FileID();
350 
351   if (LineTable)
352     LineTable->clear();
353 
354   // Use up FileID #0 as an invalid instantiation.
355   NextOffset = 0;
356   createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
357 }
358 
359 /// getOrCreateContentCache - Create or return a cached ContentCache for the
360 /// specified file.
361 const ContentCache *
362 SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
363   assert(FileEnt && "Didn't specify a file entry to use?");
364 
365   // Do we already have information about this file?
366   ContentCache *&Entry = FileInfos[FileEnt];
367   if (Entry) return Entry;
368 
369   // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
370   // so that FileInfo can use the low 3 bits of the pointer for its own
371   // nefarious purposes.
372   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
373   EntryAlign = std::max(8U, EntryAlign);
374   Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
375   new (Entry) ContentCache(FileEnt);
376   return Entry;
377 }
378 
379 
380 /// createMemBufferContentCache - Create a new ContentCache for the specified
381 ///  memory buffer.  This does no caching.
382 const ContentCache*
383 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
384   // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
385   // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
386   // the pointer for its own nefarious purposes.
387   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
388   EntryAlign = std::max(8U, EntryAlign);
389   ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
390   new (Entry) ContentCache();
391   MemBufferInfos.push_back(Entry);
392   Entry->setBuffer(Buffer);
393   return Entry;
394 }
395 
396 //===----------------------------------------------------------------------===//
397 // Methods to create new FileID's and instantiations.
398 //===----------------------------------------------------------------------===//
399 
400 /// createFileID - Create a new fileID for the specified ContentCache and
401 /// include position.  This works regardless of whether the ContentCache
402 /// corresponds to a file or some other input source.
403 FileID SourceManager::createFileID(const ContentCache *File,
404                                    SourceLocation IncludePos,
405                                    SrcMgr::CharacteristicKind FileCharacter) {
406   SLocEntryTable.push_back(SLocEntry::get(NextOffset,
407                                           FileInfo::get(IncludePos, File,
408                                                         FileCharacter)));
409   unsigned FileSize = File->getSize();
410   assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
411   NextOffset += FileSize+1;
412 
413   // Set LastFileIDLookup to the newly created file.  The next getFileID call is
414   // almost guaranteed to be from that file.
415   return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
416 }
417 
418 /// createInstantiationLoc - Return a new SourceLocation that encodes the fact
419 /// that a token from SpellingLoc should actually be referenced from
420 /// InstantiationLoc.
421 SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
422                                                      SourceLocation ILocStart,
423                                                      SourceLocation ILocEnd,
424                                                      unsigned TokLength) {
425   InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
426   SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
427   assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
428   NextOffset += TokLength+1;
429   return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
430 }
431 
432 /// getBufferData - Return a pointer to the start and end of the source buffer
433 /// data for the specified FileID.
434 std::pair<const char*, const char*>
435 SourceManager::getBufferData(FileID FID) const {
436   const llvm::MemoryBuffer *Buf = getBuffer(FID);
437   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
438 }
439 
440 
441 //===----------------------------------------------------------------------===//
442 // SourceLocation manipulation methods.
443 //===----------------------------------------------------------------------===//
444 
445 /// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
446 /// method that is used for all SourceManager queries that start with a
447 /// SourceLocation object.  It is responsible for finding the entry in
448 /// SLocEntryTable which contains the specified location.
449 ///
450 FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
451   assert(SLocOffset && "Invalid FileID");
452 
453   // After the first and second level caches, I see two common sorts of
454   // behavior: 1) a lot of searched FileID's are "near" the cached file location
455   // or are "near" the cached instantiation location.  2) others are just
456   // completely random and may be a very long way away.
457   //
458   // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
459   // then we fall back to a less cache efficient, but more scalable, binary
460   // search to find the location.
461 
462   // See if this is near the file point - worst case we start scanning from the
463   // most newly created FileID.
464   std::vector<SrcMgr::SLocEntry>::const_iterator I;
465 
466   if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
467     // Neither loc prunes our search.
468     I = SLocEntryTable.end();
469   } else {
470     // Perhaps it is near the file point.
471     I = SLocEntryTable.begin()+LastFileIDLookup.ID;
472   }
473 
474   // Find the FileID that contains this.  "I" is an iterator that points to a
475   // FileID whose offset is known to be larger than SLocOffset.
476   unsigned NumProbes = 0;
477   while (1) {
478     --I;
479     if (I->getOffset() <= SLocOffset) {
480 #if 0
481       printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
482              I-SLocEntryTable.begin(),
483              I->isInstantiation() ? "inst" : "file",
484              LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
485 #endif
486       FileID Res = FileID::get(I-SLocEntryTable.begin());
487 
488       // If this isn't an instantiation, remember it.  We have good locality
489       // across FileID lookups.
490       if (!I->isInstantiation())
491         LastFileIDLookup = Res;
492       NumLinearScans += NumProbes+1;
493       return Res;
494     }
495     if (++NumProbes == 8)
496       break;
497   }
498 
499   // Convert "I" back into an index.  We know that it is an entry whose index is
500   // larger than the offset we are looking for.
501   unsigned GreaterIndex = I-SLocEntryTable.begin();
502   // LessIndex - This is the lower bound of the range that we're searching.
503   // We know that the offset corresponding to the FileID is is less than
504   // SLocOffset.
505   unsigned LessIndex = 0;
506   NumProbes = 0;
507   while (1) {
508     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
509     unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
510 
511     ++NumProbes;
512 
513     // If the offset of the midpoint is too large, chop the high side of the
514     // range to the midpoint.
515     if (MidOffset > SLocOffset) {
516       GreaterIndex = MiddleIndex;
517       continue;
518     }
519 
520     // If the middle index contains the value, succeed and return.
521     if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
522 #if 0
523       printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
524              I-SLocEntryTable.begin(),
525              I->isInstantiation() ? "inst" : "file",
526              LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
527 #endif
528       FileID Res = FileID::get(MiddleIndex);
529 
530       // If this isn't an instantiation, remember it.  We have good locality
531       // across FileID lookups.
532       if (!I->isInstantiation())
533         LastFileIDLookup = Res;
534       NumBinaryProbes += NumProbes;
535       return Res;
536     }
537 
538     // Otherwise, move the low-side up to the middle index.
539     LessIndex = MiddleIndex;
540   }
541 }
542 
543 SourceLocation SourceManager::
544 getInstantiationLocSlowCase(SourceLocation Loc) const {
545   do {
546     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
547     Loc = getSLocEntry(LocInfo.first).getInstantiation()
548                    .getInstantiationLocStart();
549     Loc = Loc.getFileLocWithOffset(LocInfo.second);
550   } while (!Loc.isFileID());
551 
552   return Loc;
553 }
554 
555 SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
556   do {
557     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
558     Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
559     Loc = Loc.getFileLocWithOffset(LocInfo.second);
560   } while (!Loc.isFileID());
561   return Loc;
562 }
563 
564 
565 std::pair<FileID, unsigned>
566 SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
567                                                      unsigned Offset) const {
568   // If this is an instantiation record, walk through all the instantiation
569   // points.
570   FileID FID;
571   SourceLocation Loc;
572   do {
573     Loc = E->getInstantiation().getInstantiationLocStart();
574 
575     FID = getFileID(Loc);
576     E = &getSLocEntry(FID);
577     Offset += Loc.getOffset()-E->getOffset();
578   } while (!Loc.isFileID());
579 
580   return std::make_pair(FID, Offset);
581 }
582 
583 std::pair<FileID, unsigned>
584 SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
585                                                 unsigned Offset) const {
586   // If this is an instantiation record, walk through all the instantiation
587   // points.
588   FileID FID;
589   SourceLocation Loc;
590   do {
591     Loc = E->getInstantiation().getSpellingLoc();
592 
593     FID = getFileID(Loc);
594     E = &getSLocEntry(FID);
595     Offset += Loc.getOffset()-E->getOffset();
596   } while (!Loc.isFileID());
597 
598   return std::make_pair(FID, Offset);
599 }
600 
601 /// getImmediateSpellingLoc - Given a SourceLocation object, return the
602 /// spelling location referenced by the ID.  This is the first level down
603 /// towards the place where the characters that make up the lexed token can be
604 /// found.  This should not generally be used by clients.
605 SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
606   if (Loc.isFileID()) return Loc;
607   std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
608   Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
609   return Loc.getFileLocWithOffset(LocInfo.second);
610 }
611 
612 
613 /// getImmediateInstantiationRange - Loc is required to be an instantiation
614 /// location.  Return the start/end of the instantiation information.
615 std::pair<SourceLocation,SourceLocation>
616 SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
617   assert(Loc.isMacroID() && "Not an instantiation loc!");
618   const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
619   return II.getInstantiationLocRange();
620 }
621 
622 /// getInstantiationRange - Given a SourceLocation object, return the
623 /// range of tokens covered by the instantiation in the ultimate file.
624 std::pair<SourceLocation,SourceLocation>
625 SourceManager::getInstantiationRange(SourceLocation Loc) const {
626   if (Loc.isFileID()) return std::make_pair(Loc, Loc);
627 
628   std::pair<SourceLocation,SourceLocation> Res =
629     getImmediateInstantiationRange(Loc);
630 
631   // Fully resolve the start and end locations to their ultimate instantiation
632   // points.
633   while (!Res.first.isFileID())
634     Res.first = getImmediateInstantiationRange(Res.first).first;
635   while (!Res.second.isFileID())
636     Res.second = getImmediateInstantiationRange(Res.second).second;
637   return Res;
638 }
639 
640 
641 
642 //===----------------------------------------------------------------------===//
643 // Queries about the code at a SourceLocation.
644 //===----------------------------------------------------------------------===//
645 
646 /// getCharacterData - Return a pointer to the start of the specified location
647 /// in the appropriate MemoryBuffer.
648 const char *SourceManager::getCharacterData(SourceLocation SL) const {
649   // Note that this is a hot function in the getSpelling() path, which is
650   // heavily used by -E mode.
651   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
652 
653   // Note that calling 'getBuffer()' may lazily page in a source file.
654   return getSLocEntry(LocInfo.first).getFile().getContentCache()
655               ->getBuffer()->getBufferStart() + LocInfo.second;
656 }
657 
658 
659 /// getColumnNumber - Return the column # for the specified file position.
660 /// this is significantly cheaper to compute than the line number.
661 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
662   const char *Buf = getBuffer(FID)->getBufferStart();
663 
664   unsigned LineStart = FilePos;
665   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
666     --LineStart;
667   return FilePos-LineStart+1;
668 }
669 
670 unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
671   if (Loc.isInvalid()) return 0;
672   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
673   return getColumnNumber(LocInfo.first, LocInfo.second);
674 }
675 
676 unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
677   if (Loc.isInvalid()) return 0;
678   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
679   return getColumnNumber(LocInfo.first, LocInfo.second);
680 }
681 
682 
683 
684 static void ComputeLineNumbers(ContentCache* FI,
685                                llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
686 static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
687   // Note that calling 'getBuffer()' may lazily page in the file.
688   const MemoryBuffer *Buffer = FI->getBuffer();
689 
690   // Find the file offsets of all of the *physical* source lines.  This does
691   // not look at trigraphs, escaped newlines, or anything else tricky.
692   std::vector<unsigned> LineOffsets;
693 
694   // Line #1 starts at char 0.
695   LineOffsets.push_back(0);
696 
697   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
698   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
699   unsigned Offs = 0;
700   while (1) {
701     // Skip over the contents of the line.
702     // TODO: Vectorize this?  This is very performance sensitive for programs
703     // with lots of diagnostics and in -E mode.
704     const unsigned char *NextBuf = (const unsigned char *)Buf;
705     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
706       ++NextBuf;
707     Offs += NextBuf-Buf;
708     Buf = NextBuf;
709 
710     if (Buf[0] == '\n' || Buf[0] == '\r') {
711       // If this is \n\r or \r\n, skip both characters.
712       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
713         ++Offs, ++Buf;
714       ++Offs, ++Buf;
715       LineOffsets.push_back(Offs);
716     } else {
717       // Otherwise, this is a null.  If end of file, exit.
718       if (Buf == End) break;
719       // Otherwise, skip the null.
720       ++Offs, ++Buf;
721     }
722   }
723 
724   // Copy the offsets into the FileInfo structure.
725   FI->NumLines = LineOffsets.size();
726   FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
727   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
728 }
729 
730 /// getLineNumber - Given a SourceLocation, return the spelling line number
731 /// for the position indicated.  This requires building and caching a table of
732 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
733 /// about to emit a diagnostic.
734 unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
735   ContentCache *Content;
736   if (LastLineNoFileIDQuery == FID)
737     Content = LastLineNoContentCache;
738   else
739     Content = const_cast<ContentCache*>(getSLocEntry(FID)
740                                         .getFile().getContentCache());
741 
742   // If this is the first use of line information for this buffer, compute the
743   /// SourceLineCache for it on demand.
744   if (Content->SourceLineCache == 0)
745     ComputeLineNumbers(Content, ContentCacheAlloc);
746 
747   // Okay, we know we have a line number table.  Do a binary search to find the
748   // line number that this character position lands on.
749   unsigned *SourceLineCache = Content->SourceLineCache;
750   unsigned *SourceLineCacheStart = SourceLineCache;
751   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
752 
753   unsigned QueriedFilePos = FilePos+1;
754 
755   // If the previous query was to the same file, we know both the file pos from
756   // that query and the line number returned.  This allows us to narrow the
757   // search space from the entire file to something near the match.
758   if (LastLineNoFileIDQuery == FID) {
759     if (QueriedFilePos >= LastLineNoFilePos) {
760       SourceLineCache = SourceLineCache+LastLineNoResult-1;
761 
762       // The query is likely to be nearby the previous one.  Here we check to
763       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
764       // where big comment blocks and vertical whitespace eat up lines but
765       // contribute no tokens.
766       if (SourceLineCache+5 < SourceLineCacheEnd) {
767         if (SourceLineCache[5] > QueriedFilePos)
768           SourceLineCacheEnd = SourceLineCache+5;
769         else if (SourceLineCache+10 < SourceLineCacheEnd) {
770           if (SourceLineCache[10] > QueriedFilePos)
771             SourceLineCacheEnd = SourceLineCache+10;
772           else if (SourceLineCache+20 < SourceLineCacheEnd) {
773             if (SourceLineCache[20] > QueriedFilePos)
774               SourceLineCacheEnd = SourceLineCache+20;
775           }
776         }
777       }
778     } else {
779       SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
780     }
781   }
782 
783   // If the spread is large, do a "radix" test as our initial guess, based on
784   // the assumption that lines average to approximately the same length.
785   // NOTE: This is currently disabled, as it does not appear to be profitable in
786   // initial measurements.
787   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
788     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
789 
790     // Take a stab at guessing where it is.
791     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
792 
793     // Check for -10 and +10 lines.
794     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
795     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
796 
797     // If the computed lower bound is less than the query location, move it in.
798     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
799         SourceLineCacheStart[LowerBound] < QueriedFilePos)
800       SourceLineCache = SourceLineCacheStart+LowerBound;
801 
802     // If the computed upper bound is greater than the query location, move it.
803     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
804         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
805       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
806   }
807 
808   unsigned *Pos
809     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
810   unsigned LineNo = Pos-SourceLineCacheStart;
811 
812   LastLineNoFileIDQuery = FID;
813   LastLineNoContentCache = Content;
814   LastLineNoFilePos = QueriedFilePos;
815   LastLineNoResult = LineNo;
816   return LineNo;
817 }
818 
819 unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
820   if (Loc.isInvalid()) return 0;
821   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
822   return getLineNumber(LocInfo.first, LocInfo.second);
823 }
824 unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
825   if (Loc.isInvalid()) return 0;
826   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
827   return getLineNumber(LocInfo.first, LocInfo.second);
828 }
829 
830 /// getFileCharacteristic - return the file characteristic of the specified
831 /// source location, indicating whether this is a normal file, a system
832 /// header, or an "implicit extern C" system header.
833 ///
834 /// This state can be modified with flags on GNU linemarker directives like:
835 ///   # 4 "foo.h" 3
836 /// which changes all source locations in the current file after that to be
837 /// considered to be from a system header.
838 SrcMgr::CharacteristicKind
839 SourceManager::getFileCharacteristic(SourceLocation Loc) const {
840   assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
841   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
842   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
843 
844   // If there are no #line directives in this file, just return the whole-file
845   // state.
846   if (!FI.hasLineDirectives())
847     return FI.getFileCharacteristic();
848 
849   assert(LineTable && "Can't have linetable entries without a LineTable!");
850   // See if there is a #line directive before the location.
851   const LineEntry *Entry =
852     LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
853 
854   // If this is before the first line marker, use the file characteristic.
855   if (!Entry)
856     return FI.getFileCharacteristic();
857 
858   return Entry->FileKind;
859 }
860 
861 /// Return the filename or buffer identifier of the buffer the location is in.
862 /// Note that this name does not respect #line directives.  Use getPresumedLoc
863 /// for normal clients.
864 const char *SourceManager::getBufferName(SourceLocation Loc) const {
865   if (Loc.isInvalid()) return "<invalid loc>";
866 
867   return getBuffer(getFileID(Loc))->getBufferIdentifier();
868 }
869 
870 
871 /// getPresumedLoc - This method returns the "presumed" location of a
872 /// SourceLocation specifies.  A "presumed location" can be modified by #line
873 /// or GNU line marker directives.  This provides a view on the data that a
874 /// user should see in diagnostics, for example.
875 ///
876 /// Note that a presumed location is always given as the instantiation point
877 /// of an instantiation location, not at the spelling location.
878 PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
879   if (Loc.isInvalid()) return PresumedLoc();
880 
881   // Presumed locations are always for instantiation points.
882   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
883 
884   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
885   const SrcMgr::ContentCache *C = FI.getContentCache();
886 
887   // To get the source name, first consult the FileEntry (if one exists)
888   // before the MemBuffer as this will avoid unnecessarily paging in the
889   // MemBuffer.
890   const char *Filename =
891     C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
892   unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
893   unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
894   SourceLocation IncludeLoc = FI.getIncludeLoc();
895 
896   // If we have #line directives in this file, update and overwrite the physical
897   // location info if appropriate.
898   if (FI.hasLineDirectives()) {
899     assert(LineTable && "Can't have linetable entries without a LineTable!");
900     // See if there is a #line directive before this.  If so, get it.
901     if (const LineEntry *Entry =
902           LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
903       // If the LineEntry indicates a filename, use it.
904       if (Entry->FilenameID != -1)
905         Filename = LineTable->getFilename(Entry->FilenameID);
906 
907       // Use the line number specified by the LineEntry.  This line number may
908       // be multiple lines down from the line entry.  Add the difference in
909       // physical line numbers from the query point and the line marker to the
910       // total.
911       unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
912       LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
913 
914       // Note that column numbers are not molested by line markers.
915 
916       // Handle virtual #include manipulation.
917       if (Entry->IncludeOffset) {
918         IncludeLoc = getLocForStartOfFile(LocInfo.first);
919         IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
920       }
921     }
922   }
923 
924   return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
925 }
926 
927 //===----------------------------------------------------------------------===//
928 // Other miscellaneous methods.
929 //===----------------------------------------------------------------------===//
930 
931 
932 /// PrintStats - Print statistics to stderr.
933 ///
934 void SourceManager::PrintStats() const {
935   llvm::cerr << "\n*** Source Manager Stats:\n";
936   llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
937              << " mem buffers mapped.\n";
938   llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, "
939              << NextOffset << "B of Sloc address space used.\n";
940 
941   unsigned NumLineNumsComputed = 0;
942   unsigned NumFileBytesMapped = 0;
943   for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
944     NumLineNumsComputed += I->second->SourceLineCache != 0;
945     NumFileBytesMapped  += I->second->getSizeBytesMapped();
946   }
947 
948   llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
949              << NumLineNumsComputed << " files with line #'s computed.\n";
950   llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
951              << NumBinaryProbes << " binary.\n";
952 }
953 
954 //===----------------------------------------------------------------------===//
955 // Serialization.
956 //===----------------------------------------------------------------------===//
957 
958 void ContentCache::Emit(llvm::Serializer& S) const {
959   S.FlushRecord();
960   S.EmitPtr(this);
961 
962   if (Entry) {
963     llvm::sys::Path Fname(Buffer->getBufferIdentifier());
964 
965     if (Fname.isAbsolute())
966       S.EmitCStr(Fname.c_str());
967     else {
968       // Create an absolute path.
969       // FIXME: This will potentially contain ".." and "." in the path.
970       llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
971       path.appendComponent(Fname.c_str());
972       S.EmitCStr(path.c_str());
973     }
974   }
975   else {
976     const char* p = Buffer->getBufferStart();
977     const char* e = Buffer->getBufferEnd();
978 
979     S.EmitInt(e-p);
980 
981     for ( ; p != e; ++p)
982       S.EmitInt(*p);
983   }
984 
985   S.FlushRecord();
986 }
987 
988 void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
989                                        SourceManager& SMgr,
990                                        FileManager* FMgr,
991                                        std::vector<char>& Buf) {
992   if (FMgr) {
993     llvm::SerializedPtrID PtrID = D.ReadPtrID();
994     D.ReadCStr(Buf,false);
995 
996     // Create/fetch the FileEntry.
997     const char* start = &Buf[0];
998     const FileEntry* E = FMgr->getFile(start,start+Buf.size());
999 
1000     // FIXME: Ideally we want a lazy materialization of the ContentCache
1001     //  anyway, because we don't want to read in source files unless this
1002     //  is absolutely needed.
1003     if (!E)
1004       D.RegisterPtr(PtrID,NULL);
1005     else
1006       // Get the ContextCache object and register it with the deserializer.
1007       D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
1008     return;
1009   }
1010 
1011   // Register the ContextCache object with the deserializer.
1012   /* FIXME:
1013   ContentCache *Entry
1014   SMgr.MemBufferInfos.push_back(ContentCache());
1015    = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
1016   D.RegisterPtr(&Entry);
1017 
1018   // Create the buffer.
1019   unsigned Size = D.ReadInt();
1020   Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
1021 
1022   // Read the contents of the buffer.
1023   char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
1024   for (unsigned i = 0; i < Size ; ++i)
1025     p[i] = D.ReadInt();
1026    */
1027 }
1028 
1029 void SourceManager::Emit(llvm::Serializer& S) const {
1030   S.EnterBlock();
1031   S.EmitPtr(this);
1032   S.EmitInt(MainFileID.getOpaqueValue());
1033 
1034   // Emit: FileInfos.  Just emit the file name.
1035   S.EnterBlock();
1036 
1037   // FIXME: Emit FileInfos.
1038   //std::for_each(FileInfos.begin(), FileInfos.end(),
1039   //              S.MakeEmitter<ContentCache>());
1040 
1041   S.ExitBlock();
1042 
1043   // Emit: MemBufferInfos
1044   S.EnterBlock();
1045 
1046   /* FIXME: EMIT.
1047   std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
1048                 S.MakeEmitter<ContentCache>());
1049    */
1050 
1051   S.ExitBlock();
1052 
1053   // FIXME: Emit SLocEntryTable.
1054 
1055   S.ExitBlock();
1056 }
1057 
1058 SourceManager*
1059 SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) {
1060   SourceManager *M = new SourceManager();
1061   D.RegisterPtr(M);
1062 
1063   // Read: the FileID of the main source file of the translation unit.
1064   M->MainFileID = FileID::get(D.ReadInt());
1065 
1066   std::vector<char> Buf;
1067 
1068   /*{ // FIXME Read: FileInfos.
1069     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
1070     while (!D.FinishedBlock(BLoc))
1071     ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
1072   }*/
1073 
1074   { // Read: MemBufferInfos.
1075     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
1076     while (!D.FinishedBlock(BLoc))
1077     ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
1078   }
1079 
1080   // FIXME: Read SLocEntryTable.
1081 
1082   return M;
1083 }
1084