xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision 53ad6b94b0fdce48638434e9b280544e7f983285)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/SourceManagerInternals.h"
16 #include "clang/Basic/FileManager.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include "llvm/System/Path.h"
21 #include <algorithm>
22 using namespace clang;
23 using namespace SrcMgr;
24 using llvm::MemoryBuffer;
25 
26 //===----------------------------------------------------------------------===//
27 // SourceManager Helper Classes
28 //===----------------------------------------------------------------------===//
29 
30 ContentCache::~ContentCache() {
31   delete Buffer;
32 }
33 
34 /// getSizeBytesMapped - Returns the number of bytes actually mapped for
35 ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
36 ///  instantiated.
37 unsigned ContentCache::getSizeBytesMapped() const {
38   return Buffer ? Buffer->getBufferSize() : 0;
39 }
40 
41 /// getSize - Returns the size of the content encapsulated by this ContentCache.
42 ///  This can be the size of the source file or the size of an arbitrary
43 ///  scratch buffer.  If the ContentCache encapsulates a source file, that
44 ///  file is not lazily brought in from disk to satisfy this query.
45 unsigned ContentCache::getSize() const {
46   return Buffer ? Buffer->getBufferSize() : Entry->getSize();
47 }
48 
49 void ContentCache::replaceBuffer(const llvm::MemoryBuffer *B) {
50   if (B == Buffer)
51     return;
52 
53   delete Buffer;
54   Buffer = B;
55 }
56 
57 const llvm::MemoryBuffer *ContentCache::getBuffer(std::string *ErrorStr) const {
58   // Lazily create the Buffer for ContentCaches that wrap files.
59   if (!Buffer && Entry) {
60     // FIXME: Should we support a way to not have to do this check over
61     //   and over if we cannot open the file?
62     Buffer = MemoryBuffer::getFile(Entry->getName(), ErrorStr,Entry->getSize());
63   }
64   return Buffer;
65 }
66 
67 unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
68   // Look up the filename in the string table, returning the pre-existing value
69   // if it exists.
70   llvm::StringMapEntry<unsigned> &Entry =
71     FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
72   if (Entry.getValue() != ~0U)
73     return Entry.getValue();
74 
75   // Otherwise, assign this the next available ID.
76   Entry.setValue(FilenamesByID.size());
77   FilenamesByID.push_back(&Entry);
78   return FilenamesByID.size()-1;
79 }
80 
81 /// AddLineNote - Add a line note to the line table that indicates that there
82 /// is a #line at the specified FID/Offset location which changes the presumed
83 /// location to LineNo/FilenameID.
84 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
85                                 unsigned LineNo, int FilenameID) {
86   std::vector<LineEntry> &Entries = LineEntries[FID];
87 
88   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
89          "Adding line entries out of order!");
90 
91   SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
92   unsigned IncludeOffset = 0;
93 
94   if (!Entries.empty()) {
95     // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
96     // that we are still in "foo.h".
97     if (FilenameID == -1)
98       FilenameID = Entries.back().FilenameID;
99 
100     // If we are after a line marker that switched us to system header mode, or
101     // that set #include information, preserve it.
102     Kind = Entries.back().FileKind;
103     IncludeOffset = Entries.back().IncludeOffset;
104   }
105 
106   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
107                                    IncludeOffset));
108 }
109 
110 /// AddLineNote This is the same as the previous version of AddLineNote, but is
111 /// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
112 /// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
113 /// this is a file exit.  FileKind specifies whether this is a system header or
114 /// extern C system header.
115 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
116                                 unsigned LineNo, int FilenameID,
117                                 unsigned EntryExit,
118                                 SrcMgr::CharacteristicKind FileKind) {
119   assert(FilenameID != -1 && "Unspecified filename should use other accessor");
120 
121   std::vector<LineEntry> &Entries = LineEntries[FID];
122 
123   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
124          "Adding line entries out of order!");
125 
126   unsigned IncludeOffset = 0;
127   if (EntryExit == 0) {  // No #include stack change.
128     IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
129   } else if (EntryExit == 1) {
130     IncludeOffset = Offset-1;
131   } else if (EntryExit == 2) {
132     assert(!Entries.empty() && Entries.back().IncludeOffset &&
133        "PPDirectives should have caught case when popping empty include stack");
134 
135     // Get the include loc of the last entries' include loc as our include loc.
136     IncludeOffset = 0;
137     if (const LineEntry *PrevEntry =
138           FindNearestLineEntry(FID, Entries.back().IncludeOffset))
139       IncludeOffset = PrevEntry->IncludeOffset;
140   }
141 
142   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
143                                    IncludeOffset));
144 }
145 
146 
147 /// FindNearestLineEntry - Find the line entry nearest to FID that is before
148 /// it.  If there is no line entry before Offset in FID, return null.
149 const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
150                                                      unsigned Offset) {
151   const std::vector<LineEntry> &Entries = LineEntries[FID];
152   assert(!Entries.empty() && "No #line entries for this FID after all!");
153 
154   // It is very common for the query to be after the last #line, check this
155   // first.
156   if (Entries.back().FileOffset <= Offset)
157     return &Entries.back();
158 
159   // Do a binary search to find the maximal element that is still before Offset.
160   std::vector<LineEntry>::const_iterator I =
161     std::upper_bound(Entries.begin(), Entries.end(), Offset);
162   if (I == Entries.begin()) return 0;
163   return &*--I;
164 }
165 
166 /// \brief Add a new line entry that has already been encoded into
167 /// the internal representation of the line table.
168 void LineTableInfo::AddEntry(unsigned FID,
169                              const std::vector<LineEntry> &Entries) {
170   LineEntries[FID] = Entries;
171 }
172 
173 /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
174 ///
175 unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
176   if (LineTable == 0)
177     LineTable = new LineTableInfo();
178   return LineTable->getLineTableFilenameID(Ptr, Len);
179 }
180 
181 
182 /// AddLineNote - Add a line note to the line table for the FileID and offset
183 /// specified by Loc.  If FilenameID is -1, it is considered to be
184 /// unspecified.
185 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
186                                 int FilenameID) {
187   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
188 
189   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
190 
191   // Remember that this file has #line directives now if it doesn't already.
192   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
193 
194   if (LineTable == 0)
195     LineTable = new LineTableInfo();
196   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
197 }
198 
199 /// AddLineNote - Add a GNU line marker to the line table.
200 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
201                                 int FilenameID, bool IsFileEntry,
202                                 bool IsFileExit, bool IsSystemHeader,
203                                 bool IsExternCHeader) {
204   // If there is no filename and no flags, this is treated just like a #line,
205   // which does not change the flags of the previous line marker.
206   if (FilenameID == -1) {
207     assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
208            "Can't set flags without setting the filename!");
209     return AddLineNote(Loc, LineNo, FilenameID);
210   }
211 
212   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
213   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
214 
215   // Remember that this file has #line directives now if it doesn't already.
216   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
217 
218   if (LineTable == 0)
219     LineTable = new LineTableInfo();
220 
221   SrcMgr::CharacteristicKind FileKind;
222   if (IsExternCHeader)
223     FileKind = SrcMgr::C_ExternCSystem;
224   else if (IsSystemHeader)
225     FileKind = SrcMgr::C_System;
226   else
227     FileKind = SrcMgr::C_User;
228 
229   unsigned EntryExit = 0;
230   if (IsFileEntry)
231     EntryExit = 1;
232   else if (IsFileExit)
233     EntryExit = 2;
234 
235   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
236                          EntryExit, FileKind);
237 }
238 
239 LineTableInfo &SourceManager::getLineTable() {
240   if (LineTable == 0)
241     LineTable = new LineTableInfo();
242   return *LineTable;
243 }
244 
245 //===----------------------------------------------------------------------===//
246 // Private 'Create' methods.
247 //===----------------------------------------------------------------------===//
248 
249 SourceManager::~SourceManager() {
250   delete LineTable;
251 
252   // Delete FileEntry objects corresponding to content caches.  Since the actual
253   // content cache objects are bump pointer allocated, we just have to run the
254   // dtors, but we call the deallocate method for completeness.
255   for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
256     MemBufferInfos[i]->~ContentCache();
257     ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
258   }
259   for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
260        I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
261     I->second->~ContentCache();
262     ContentCacheAlloc.Deallocate(I->second);
263   }
264 }
265 
266 void SourceManager::clearIDTables() {
267   MainFileID = FileID();
268   SLocEntryTable.clear();
269   LastLineNoFileIDQuery = FileID();
270   LastLineNoContentCache = 0;
271   LastFileIDLookup = FileID();
272 
273   if (LineTable)
274     LineTable->clear();
275 
276   // Use up FileID #0 as an invalid instantiation.
277   NextOffset = 0;
278   createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
279 }
280 
281 /// getOrCreateContentCache - Create or return a cached ContentCache for the
282 /// specified file.
283 const ContentCache *
284 SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
285   assert(FileEnt && "Didn't specify a file entry to use?");
286 
287   // Do we already have information about this file?
288   ContentCache *&Entry = FileInfos[FileEnt];
289   if (Entry) return Entry;
290 
291   // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
292   // so that FileInfo can use the low 3 bits of the pointer for its own
293   // nefarious purposes.
294   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
295   EntryAlign = std::max(8U, EntryAlign);
296   Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
297   new (Entry) ContentCache(FileEnt);
298   return Entry;
299 }
300 
301 
302 /// createMemBufferContentCache - Create a new ContentCache for the specified
303 ///  memory buffer.  This does no caching.
304 const ContentCache*
305 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
306   // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
307   // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
308   // the pointer for its own nefarious purposes.
309   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
310   EntryAlign = std::max(8U, EntryAlign);
311   ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
312   new (Entry) ContentCache();
313   MemBufferInfos.push_back(Entry);
314   Entry->setBuffer(Buffer);
315   return Entry;
316 }
317 
318 void SourceManager::PreallocateSLocEntries(ExternalSLocEntrySource *Source,
319                                            unsigned NumSLocEntries,
320                                            unsigned NextOffset) {
321   ExternalSLocEntries = Source;
322   this->NextOffset = NextOffset;
323   SLocEntryLoaded.resize(NumSLocEntries + 1);
324   SLocEntryLoaded[0] = true;
325   SLocEntryTable.resize(SLocEntryTable.size() + NumSLocEntries);
326 }
327 
328 void SourceManager::ClearPreallocatedSLocEntries() {
329   unsigned I = 0;
330   for (unsigned N = SLocEntryLoaded.size(); I != N; ++I)
331     if (!SLocEntryLoaded[I])
332       break;
333 
334   // We've already loaded all preallocated source location entries.
335   if (I == SLocEntryLoaded.size())
336     return;
337 
338   // Remove everything from location I onward.
339   SLocEntryTable.resize(I);
340   SLocEntryLoaded.clear();
341   ExternalSLocEntries = 0;
342 }
343 
344 
345 //===----------------------------------------------------------------------===//
346 // Methods to create new FileID's and instantiations.
347 //===----------------------------------------------------------------------===//
348 
349 /// createFileID - Create a new fileID for the specified ContentCache and
350 /// include position.  This works regardless of whether the ContentCache
351 /// corresponds to a file or some other input source.
352 FileID SourceManager::createFileID(const ContentCache *File,
353                                    SourceLocation IncludePos,
354                                    SrcMgr::CharacteristicKind FileCharacter,
355                                    unsigned PreallocatedID,
356                                    unsigned Offset) {
357   if (PreallocatedID) {
358     // If we're filling in a preallocated ID, just load in the file
359     // entry and return.
360     assert(PreallocatedID < SLocEntryLoaded.size() &&
361            "Preallocate ID out-of-range");
362     assert(!SLocEntryLoaded[PreallocatedID] &&
363            "Source location entry already loaded");
364     assert(Offset && "Preallocate source location cannot have zero offset");
365     SLocEntryTable[PreallocatedID]
366       = SLocEntry::get(Offset, FileInfo::get(IncludePos, File, FileCharacter));
367     SLocEntryLoaded[PreallocatedID] = true;
368     FileID FID = FileID::get(PreallocatedID);
369     return LastFileIDLookup = FID;
370   }
371 
372   SLocEntryTable.push_back(SLocEntry::get(NextOffset,
373                                           FileInfo::get(IncludePos, File,
374                                                         FileCharacter)));
375   unsigned FileSize = File->getSize();
376   assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
377   NextOffset += FileSize+1;
378 
379   // Set LastFileIDLookup to the newly created file.  The next getFileID call is
380   // almost guaranteed to be from that file.
381   FileID FID = FileID::get(SLocEntryTable.size()-1);
382   return LastFileIDLookup = FID;
383 }
384 
385 /// createInstantiationLoc - Return a new SourceLocation that encodes the fact
386 /// that a token from SpellingLoc should actually be referenced from
387 /// InstantiationLoc.
388 SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
389                                                      SourceLocation ILocStart,
390                                                      SourceLocation ILocEnd,
391                                                      unsigned TokLength,
392                                                      unsigned PreallocatedID,
393                                                      unsigned Offset) {
394   InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
395   if (PreallocatedID) {
396     // If we're filling in a preallocated ID, just load in the
397     // instantiation entry and return.
398     assert(PreallocatedID < SLocEntryLoaded.size() &&
399            "Preallocate ID out-of-range");
400     assert(!SLocEntryLoaded[PreallocatedID] &&
401            "Source location entry already loaded");
402     assert(Offset && "Preallocate source location cannot have zero offset");
403     SLocEntryTable[PreallocatedID] = SLocEntry::get(Offset, II);
404     SLocEntryLoaded[PreallocatedID] = true;
405     return SourceLocation::getMacroLoc(Offset);
406   }
407   SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
408   assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
409   NextOffset += TokLength+1;
410   return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
411 }
412 
413 const llvm::MemoryBuffer *
414 SourceManager::getMemoryBufferForFile(const FileEntry *File) {
415   const SrcMgr::ContentCache *IR = getOrCreateContentCache(File);
416   if (IR == 0)
417     return 0;
418 
419   return IR->getBuffer();
420 }
421 
422 bool SourceManager::overrideFileContents(const FileEntry *SourceFile,
423                                          const llvm::MemoryBuffer *Buffer) {
424   const SrcMgr::ContentCache *IR = getOrCreateContentCache(SourceFile);
425   if (IR == 0)
426     return true;
427 
428   const_cast<SrcMgr::ContentCache *>(IR)->replaceBuffer(Buffer);
429   return false;
430 }
431 
432 /// getBufferData - Return a pointer to the start and end of the source buffer
433 /// data for the specified FileID.
434 std::pair<const char*, const char*>
435 SourceManager::getBufferData(FileID FID) const {
436   const llvm::MemoryBuffer *Buf = getBuffer(FID);
437   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
438 }
439 
440 
441 //===----------------------------------------------------------------------===//
442 // SourceLocation manipulation methods.
443 //===----------------------------------------------------------------------===//
444 
445 /// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
446 /// method that is used for all SourceManager queries that start with a
447 /// SourceLocation object.  It is responsible for finding the entry in
448 /// SLocEntryTable which contains the specified location.
449 ///
450 FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
451   assert(SLocOffset && "Invalid FileID");
452 
453   // After the first and second level caches, I see two common sorts of
454   // behavior: 1) a lot of searched FileID's are "near" the cached file location
455   // or are "near" the cached instantiation location.  2) others are just
456   // completely random and may be a very long way away.
457   //
458   // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
459   // then we fall back to a less cache efficient, but more scalable, binary
460   // search to find the location.
461 
462   // See if this is near the file point - worst case we start scanning from the
463   // most newly created FileID.
464   std::vector<SrcMgr::SLocEntry>::const_iterator I;
465 
466   if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
467     // Neither loc prunes our search.
468     I = SLocEntryTable.end();
469   } else {
470     // Perhaps it is near the file point.
471     I = SLocEntryTable.begin()+LastFileIDLookup.ID;
472   }
473 
474   // Find the FileID that contains this.  "I" is an iterator that points to a
475   // FileID whose offset is known to be larger than SLocOffset.
476   unsigned NumProbes = 0;
477   while (1) {
478     --I;
479     if (ExternalSLocEntries)
480       getSLocEntry(FileID::get(I - SLocEntryTable.begin()));
481     if (I->getOffset() <= SLocOffset) {
482 #if 0
483       printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
484              I-SLocEntryTable.begin(),
485              I->isInstantiation() ? "inst" : "file",
486              LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
487 #endif
488       FileID Res = FileID::get(I-SLocEntryTable.begin());
489 
490       // If this isn't an instantiation, remember it.  We have good locality
491       // across FileID lookups.
492       if (!I->isInstantiation())
493         LastFileIDLookup = Res;
494       NumLinearScans += NumProbes+1;
495       return Res;
496     }
497     if (++NumProbes == 8)
498       break;
499   }
500 
501   // Convert "I" back into an index.  We know that it is an entry whose index is
502   // larger than the offset we are looking for.
503   unsigned GreaterIndex = I-SLocEntryTable.begin();
504   // LessIndex - This is the lower bound of the range that we're searching.
505   // We know that the offset corresponding to the FileID is is less than
506   // SLocOffset.
507   unsigned LessIndex = 0;
508   NumProbes = 0;
509   while (1) {
510     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
511     unsigned MidOffset = getSLocEntry(FileID::get(MiddleIndex)).getOffset();
512 
513     ++NumProbes;
514 
515     // If the offset of the midpoint is too large, chop the high side of the
516     // range to the midpoint.
517     if (MidOffset > SLocOffset) {
518       GreaterIndex = MiddleIndex;
519       continue;
520     }
521 
522     // If the middle index contains the value, succeed and return.
523     if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
524 #if 0
525       printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
526              I-SLocEntryTable.begin(),
527              I->isInstantiation() ? "inst" : "file",
528              LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
529 #endif
530       FileID Res = FileID::get(MiddleIndex);
531 
532       // If this isn't an instantiation, remember it.  We have good locality
533       // across FileID lookups.
534       if (!I->isInstantiation())
535         LastFileIDLookup = Res;
536       NumBinaryProbes += NumProbes;
537       return Res;
538     }
539 
540     // Otherwise, move the low-side up to the middle index.
541     LessIndex = MiddleIndex;
542   }
543 }
544 
545 SourceLocation SourceManager::
546 getInstantiationLocSlowCase(SourceLocation Loc) const {
547   do {
548     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
549     Loc = getSLocEntry(LocInfo.first).getInstantiation()
550                    .getInstantiationLocStart();
551     Loc = Loc.getFileLocWithOffset(LocInfo.second);
552   } while (!Loc.isFileID());
553 
554   return Loc;
555 }
556 
557 SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
558   do {
559     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
560     Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
561     Loc = Loc.getFileLocWithOffset(LocInfo.second);
562   } while (!Loc.isFileID());
563   return Loc;
564 }
565 
566 
567 std::pair<FileID, unsigned>
568 SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
569                                                      unsigned Offset) const {
570   // If this is an instantiation record, walk through all the instantiation
571   // points.
572   FileID FID;
573   SourceLocation Loc;
574   do {
575     Loc = E->getInstantiation().getInstantiationLocStart();
576 
577     FID = getFileID(Loc);
578     E = &getSLocEntry(FID);
579     Offset += Loc.getOffset()-E->getOffset();
580   } while (!Loc.isFileID());
581 
582   return std::make_pair(FID, Offset);
583 }
584 
585 std::pair<FileID, unsigned>
586 SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
587                                                 unsigned Offset) const {
588   // If this is an instantiation record, walk through all the instantiation
589   // points.
590   FileID FID;
591   SourceLocation Loc;
592   do {
593     Loc = E->getInstantiation().getSpellingLoc();
594 
595     FID = getFileID(Loc);
596     E = &getSLocEntry(FID);
597     Offset += Loc.getOffset()-E->getOffset();
598   } while (!Loc.isFileID());
599 
600   return std::make_pair(FID, Offset);
601 }
602 
603 /// getImmediateSpellingLoc - Given a SourceLocation object, return the
604 /// spelling location referenced by the ID.  This is the first level down
605 /// towards the place where the characters that make up the lexed token can be
606 /// found.  This should not generally be used by clients.
607 SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
608   if (Loc.isFileID()) return Loc;
609   std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
610   Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
611   return Loc.getFileLocWithOffset(LocInfo.second);
612 }
613 
614 
615 /// getImmediateInstantiationRange - Loc is required to be an instantiation
616 /// location.  Return the start/end of the instantiation information.
617 std::pair<SourceLocation,SourceLocation>
618 SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
619   assert(Loc.isMacroID() && "Not an instantiation loc!");
620   const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
621   return II.getInstantiationLocRange();
622 }
623 
624 /// getInstantiationRange - Given a SourceLocation object, return the
625 /// range of tokens covered by the instantiation in the ultimate file.
626 std::pair<SourceLocation,SourceLocation>
627 SourceManager::getInstantiationRange(SourceLocation Loc) const {
628   if (Loc.isFileID()) return std::make_pair(Loc, Loc);
629 
630   std::pair<SourceLocation,SourceLocation> Res =
631     getImmediateInstantiationRange(Loc);
632 
633   // Fully resolve the start and end locations to their ultimate instantiation
634   // points.
635   while (!Res.first.isFileID())
636     Res.first = getImmediateInstantiationRange(Res.first).first;
637   while (!Res.second.isFileID())
638     Res.second = getImmediateInstantiationRange(Res.second).second;
639   return Res;
640 }
641 
642 
643 
644 //===----------------------------------------------------------------------===//
645 // Queries about the code at a SourceLocation.
646 //===----------------------------------------------------------------------===//
647 
648 /// getCharacterData - Return a pointer to the start of the specified location
649 /// in the appropriate MemoryBuffer.
650 const char *SourceManager::getCharacterData(SourceLocation SL) const {
651   // Note that this is a hot function in the getSpelling() path, which is
652   // heavily used by -E mode.
653   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
654 
655   // Note that calling 'getBuffer()' may lazily page in a source file.
656   return getSLocEntry(LocInfo.first).getFile().getContentCache()
657               ->getBuffer()->getBufferStart() + LocInfo.second;
658 }
659 
660 
661 /// getColumnNumber - Return the column # for the specified file position.
662 /// this is significantly cheaper to compute than the line number.
663 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
664   const char *Buf = getBuffer(FID)->getBufferStart();
665 
666   unsigned LineStart = FilePos;
667   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
668     --LineStart;
669   return FilePos-LineStart+1;
670 }
671 
672 unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
673   if (Loc.isInvalid()) return 0;
674   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
675   return getColumnNumber(LocInfo.first, LocInfo.second);
676 }
677 
678 unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
679   if (Loc.isInvalid()) return 0;
680   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
681   return getColumnNumber(LocInfo.first, LocInfo.second);
682 }
683 
684 
685 
686 static DISABLE_INLINE void ComputeLineNumbers(ContentCache* FI,
687                                               llvm::BumpPtrAllocator &Alloc);
688 static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
689   // Note that calling 'getBuffer()' may lazily page in the file.
690   const MemoryBuffer *Buffer = FI->getBuffer();
691 
692   // Find the file offsets of all of the *physical* source lines.  This does
693   // not look at trigraphs, escaped newlines, or anything else tricky.
694   std::vector<unsigned> LineOffsets;
695 
696   // Line #1 starts at char 0.
697   LineOffsets.push_back(0);
698 
699   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
700   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
701   unsigned Offs = 0;
702   while (1) {
703     // Skip over the contents of the line.
704     // TODO: Vectorize this?  This is very performance sensitive for programs
705     // with lots of diagnostics and in -E mode.
706     const unsigned char *NextBuf = (const unsigned char *)Buf;
707     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
708       ++NextBuf;
709     Offs += NextBuf-Buf;
710     Buf = NextBuf;
711 
712     if (Buf[0] == '\n' || Buf[0] == '\r') {
713       // If this is \n\r or \r\n, skip both characters.
714       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
715         ++Offs, ++Buf;
716       ++Offs, ++Buf;
717       LineOffsets.push_back(Offs);
718     } else {
719       // Otherwise, this is a null.  If end of file, exit.
720       if (Buf == End) break;
721       // Otherwise, skip the null.
722       ++Offs, ++Buf;
723     }
724   }
725 
726   // Copy the offsets into the FileInfo structure.
727   FI->NumLines = LineOffsets.size();
728   FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
729   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
730 }
731 
732 /// getLineNumber - Given a SourceLocation, return the spelling line number
733 /// for the position indicated.  This requires building and caching a table of
734 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
735 /// about to emit a diagnostic.
736 unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
737   ContentCache *Content;
738   if (LastLineNoFileIDQuery == FID)
739     Content = LastLineNoContentCache;
740   else
741     Content = const_cast<ContentCache*>(getSLocEntry(FID)
742                                         .getFile().getContentCache());
743 
744   // If this is the first use of line information for this buffer, compute the
745   /// SourceLineCache for it on demand.
746   if (Content->SourceLineCache == 0)
747     ComputeLineNumbers(Content, ContentCacheAlloc);
748 
749   // Okay, we know we have a line number table.  Do a binary search to find the
750   // line number that this character position lands on.
751   unsigned *SourceLineCache = Content->SourceLineCache;
752   unsigned *SourceLineCacheStart = SourceLineCache;
753   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
754 
755   unsigned QueriedFilePos = FilePos+1;
756 
757   // FIXME: I would like to be convinced that this code is worth being as
758   // complicated as it is, binary search isn't that slow.
759   //
760   // If it is worth being optimized, then in my opinion it could be more
761   // performant, simpler, and more obviously correct by just "galloping" outward
762   // from the queried file position. In fact, this could be incorporated into a
763   // generic algorithm such as lower_bound_with_hint.
764   //
765   // If someone gives me a test case where this matters, and I will do it! - DWD
766 
767   // If the previous query was to the same file, we know both the file pos from
768   // that query and the line number returned.  This allows us to narrow the
769   // search space from the entire file to something near the match.
770   if (LastLineNoFileIDQuery == FID) {
771     if (QueriedFilePos >= LastLineNoFilePos) {
772       // FIXME: Potential overflow?
773       SourceLineCache = SourceLineCache+LastLineNoResult-1;
774 
775       // The query is likely to be nearby the previous one.  Here we check to
776       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
777       // where big comment blocks and vertical whitespace eat up lines but
778       // contribute no tokens.
779       if (SourceLineCache+5 < SourceLineCacheEnd) {
780         if (SourceLineCache[5] > QueriedFilePos)
781           SourceLineCacheEnd = SourceLineCache+5;
782         else if (SourceLineCache+10 < SourceLineCacheEnd) {
783           if (SourceLineCache[10] > QueriedFilePos)
784             SourceLineCacheEnd = SourceLineCache+10;
785           else if (SourceLineCache+20 < SourceLineCacheEnd) {
786             if (SourceLineCache[20] > QueriedFilePos)
787               SourceLineCacheEnd = SourceLineCache+20;
788           }
789         }
790       }
791     } else {
792       if (LastLineNoResult < Content->NumLines)
793         SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
794     }
795   }
796 
797   // If the spread is large, do a "radix" test as our initial guess, based on
798   // the assumption that lines average to approximately the same length.
799   // NOTE: This is currently disabled, as it does not appear to be profitable in
800   // initial measurements.
801   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
802     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
803 
804     // Take a stab at guessing where it is.
805     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
806 
807     // Check for -10 and +10 lines.
808     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
809     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
810 
811     // If the computed lower bound is less than the query location, move it in.
812     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
813         SourceLineCacheStart[LowerBound] < QueriedFilePos)
814       SourceLineCache = SourceLineCacheStart+LowerBound;
815 
816     // If the computed upper bound is greater than the query location, move it.
817     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
818         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
819       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
820   }
821 
822   unsigned *Pos
823     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
824   unsigned LineNo = Pos-SourceLineCacheStart;
825 
826   LastLineNoFileIDQuery = FID;
827   LastLineNoContentCache = Content;
828   LastLineNoFilePos = QueriedFilePos;
829   LastLineNoResult = LineNo;
830   return LineNo;
831 }
832 
833 unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
834   if (Loc.isInvalid()) return 0;
835   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
836   return getLineNumber(LocInfo.first, LocInfo.second);
837 }
838 unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
839   if (Loc.isInvalid()) return 0;
840   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
841   return getLineNumber(LocInfo.first, LocInfo.second);
842 }
843 
844 /// getFileCharacteristic - return the file characteristic of the specified
845 /// source location, indicating whether this is a normal file, a system
846 /// header, or an "implicit extern C" system header.
847 ///
848 /// This state can be modified with flags on GNU linemarker directives like:
849 ///   # 4 "foo.h" 3
850 /// which changes all source locations in the current file after that to be
851 /// considered to be from a system header.
852 SrcMgr::CharacteristicKind
853 SourceManager::getFileCharacteristic(SourceLocation Loc) const {
854   assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
855   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
856   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
857 
858   // If there are no #line directives in this file, just return the whole-file
859   // state.
860   if (!FI.hasLineDirectives())
861     return FI.getFileCharacteristic();
862 
863   assert(LineTable && "Can't have linetable entries without a LineTable!");
864   // See if there is a #line directive before the location.
865   const LineEntry *Entry =
866     LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
867 
868   // If this is before the first line marker, use the file characteristic.
869   if (!Entry)
870     return FI.getFileCharacteristic();
871 
872   return Entry->FileKind;
873 }
874 
875 /// Return the filename or buffer identifier of the buffer the location is in.
876 /// Note that this name does not respect #line directives.  Use getPresumedLoc
877 /// for normal clients.
878 const char *SourceManager::getBufferName(SourceLocation Loc) const {
879   if (Loc.isInvalid()) return "<invalid loc>";
880 
881   return getBuffer(getFileID(Loc))->getBufferIdentifier();
882 }
883 
884 
885 /// getPresumedLoc - This method returns the "presumed" location of a
886 /// SourceLocation specifies.  A "presumed location" can be modified by #line
887 /// or GNU line marker directives.  This provides a view on the data that a
888 /// user should see in diagnostics, for example.
889 ///
890 /// Note that a presumed location is always given as the instantiation point
891 /// of an instantiation location, not at the spelling location.
892 PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
893   if (Loc.isInvalid()) return PresumedLoc();
894 
895   // Presumed locations are always for instantiation points.
896   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
897 
898   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
899   const SrcMgr::ContentCache *C = FI.getContentCache();
900 
901   // To get the source name, first consult the FileEntry (if one exists)
902   // before the MemBuffer as this will avoid unnecessarily paging in the
903   // MemBuffer.
904   const char *Filename =
905     C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
906   unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
907   unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
908   SourceLocation IncludeLoc = FI.getIncludeLoc();
909 
910   // If we have #line directives in this file, update and overwrite the physical
911   // location info if appropriate.
912   if (FI.hasLineDirectives()) {
913     assert(LineTable && "Can't have linetable entries without a LineTable!");
914     // See if there is a #line directive before this.  If so, get it.
915     if (const LineEntry *Entry =
916           LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
917       // If the LineEntry indicates a filename, use it.
918       if (Entry->FilenameID != -1)
919         Filename = LineTable->getFilename(Entry->FilenameID);
920 
921       // Use the line number specified by the LineEntry.  This line number may
922       // be multiple lines down from the line entry.  Add the difference in
923       // physical line numbers from the query point and the line marker to the
924       // total.
925       unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
926       LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
927 
928       // Note that column numbers are not molested by line markers.
929 
930       // Handle virtual #include manipulation.
931       if (Entry->IncludeOffset) {
932         IncludeLoc = getLocForStartOfFile(LocInfo.first);
933         IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
934       }
935     }
936   }
937 
938   return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
939 }
940 
941 //===----------------------------------------------------------------------===//
942 // Other miscellaneous methods.
943 //===----------------------------------------------------------------------===//
944 
945 /// \brief Get the source location for the given file:line:col triplet.
946 ///
947 /// If the source file is included multiple times, the source location will
948 /// be based upon the first inclusion.
949 SourceLocation SourceManager::getLocation(const FileEntry *SourceFile,
950                                           unsigned Line, unsigned Col) const {
951   assert(SourceFile && "Null source file!");
952   assert(Line && Col && "Line and column should start from 1!");
953 
954   fileinfo_iterator FI = FileInfos.find(SourceFile);
955   if (FI == FileInfos.end())
956     return SourceLocation();
957   ContentCache *Content = FI->second;
958 
959   // If this is the first use of line information for this buffer, compute the
960   /// SourceLineCache for it on demand.
961   if (Content->SourceLineCache == 0)
962     ComputeLineNumbers(Content, ContentCacheAlloc);
963 
964   if (Line > Content->NumLines)
965     return SourceLocation();
966 
967   unsigned FilePos = Content->SourceLineCache[Line - 1];
968   const char *Buf = Content->getBuffer()->getBufferStart() + FilePos;
969   unsigned BufLength = Content->getBuffer()->getBufferEnd() - Buf;
970   unsigned i = 0;
971 
972   // Check that the given column is valid.
973   while (i < BufLength-1 && i < Col-1 && Buf[i] != '\n' && Buf[i] != '\r')
974     ++i;
975   if (i < Col-1)
976     return SourceLocation();
977 
978   // Find the first file ID that corresponds to the given file.
979   FileID FirstFID;
980 
981   // First, check the main file ID, since it is common to look for a
982   // location in the main file.
983   if (!MainFileID.isInvalid()) {
984     const SLocEntry &MainSLoc = getSLocEntry(MainFileID);
985     if (MainSLoc.isFile() && MainSLoc.getFile().getContentCache() == Content)
986       FirstFID = MainFileID;
987   }
988 
989   if (FirstFID.isInvalid()) {
990     // The location we're looking for isn't in the main file; look
991     // through all of the source locations.
992     for (unsigned I = 0, N = sloc_entry_size(); I != N; ++I) {
993       const SLocEntry &SLoc = getSLocEntry(I);
994       if (SLoc.isFile() && SLoc.getFile().getContentCache() == Content) {
995         FirstFID = FileID::get(I);
996         break;
997       }
998     }
999   }
1000 
1001   if (FirstFID.isInvalid())
1002     return SourceLocation();
1003 
1004   return getLocForStartOfFile(FirstFID).getFileLocWithOffset(FilePos + Col - 1);
1005 }
1006 
1007 /// \brief Determines the order of 2 source locations in the translation unit.
1008 ///
1009 /// \returns true if LHS source location comes before RHS, false otherwise.
1010 bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
1011                                               SourceLocation RHS) const {
1012   assert(LHS.isValid() && RHS.isValid() && "Passed invalid source location!");
1013   if (LHS == RHS)
1014     return false;
1015 
1016   std::pair<FileID, unsigned> LOffs = getDecomposedLoc(LHS);
1017   std::pair<FileID, unsigned> ROffs = getDecomposedLoc(RHS);
1018 
1019   // If the source locations are in the same file, just compare offsets.
1020   if (LOffs.first == ROffs.first)
1021     return LOffs.second < ROffs.second;
1022 
1023   // If we are comparing a source location with multiple locations in the same
1024   // file, we get a big win by caching the result.
1025 
1026   if (LastLFIDForBeforeTUCheck == LOffs.first &&
1027       LastRFIDForBeforeTUCheck == ROffs.first)
1028     return LastResForBeforeTUCheck;
1029 
1030   LastLFIDForBeforeTUCheck = LOffs.first;
1031   LastRFIDForBeforeTUCheck = ROffs.first;
1032 
1033   // "Traverse" the include/instantiation stacks of both locations and try to
1034   // find a common "ancestor".
1035   //
1036   // First we traverse the stack of the right location and check each level
1037   // against the level of the left location, while collecting all levels in a
1038   // "stack map".
1039 
1040   std::map<FileID, unsigned> ROffsMap;
1041   ROffsMap[ROffs.first] = ROffs.second;
1042 
1043   while (1) {
1044     SourceLocation UpperLoc;
1045     const SrcMgr::SLocEntry &Entry = getSLocEntry(ROffs.first);
1046     if (Entry.isInstantiation())
1047       UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
1048     else
1049       UpperLoc = Entry.getFile().getIncludeLoc();
1050 
1051     if (UpperLoc.isInvalid())
1052       break; // We reached the top.
1053 
1054     ROffs = getDecomposedLoc(UpperLoc);
1055 
1056     if (LOffs.first == ROffs.first)
1057       return LastResForBeforeTUCheck = LOffs.second < ROffs.second;
1058 
1059     ROffsMap[ROffs.first] = ROffs.second;
1060   }
1061 
1062   // We didn't find a common ancestor. Now traverse the stack of the left
1063   // location, checking against the stack map of the right location.
1064 
1065   while (1) {
1066     SourceLocation UpperLoc;
1067     const SrcMgr::SLocEntry &Entry = getSLocEntry(LOffs.first);
1068     if (Entry.isInstantiation())
1069       UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
1070     else
1071       UpperLoc = Entry.getFile().getIncludeLoc();
1072 
1073     if (UpperLoc.isInvalid())
1074       break; // We reached the top.
1075 
1076     LOffs = getDecomposedLoc(UpperLoc);
1077 
1078     std::map<FileID, unsigned>::iterator I = ROffsMap.find(LOffs.first);
1079     if (I != ROffsMap.end())
1080       return LastResForBeforeTUCheck = LOffs.second < I->second;
1081   }
1082 
1083   // There is no common ancestor, most probably because one location is in the
1084   // predefines buffer.
1085   //
1086   // FIXME: We should rearrange the external interface so this simply never
1087   // happens; it can't conceptually happen. Also see PR5662.
1088 
1089   // If exactly one location is a memory buffer, assume it preceeds the other.
1090   bool LIsMB = !getSLocEntry(LOffs.first).getFile().getContentCache()->Entry;
1091   bool RIsMB = !getSLocEntry(ROffs.first).getFile().getContentCache()->Entry;
1092   if (LIsMB != RIsMB)
1093     return LastResForBeforeTUCheck = LIsMB;
1094 
1095   // Otherwise, just assume FileIDs were created in order.
1096   return LastResForBeforeTUCheck = (LOffs.first < ROffs.first);
1097 }
1098 
1099 /// PrintStats - Print statistics to stderr.
1100 ///
1101 void SourceManager::PrintStats() const {
1102   llvm::errs() << "\n*** Source Manager Stats:\n";
1103   llvm::errs() << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
1104                << " mem buffers mapped.\n";
1105   llvm::errs() << SLocEntryTable.size() << " SLocEntry's allocated, "
1106                << NextOffset << "B of Sloc address space used.\n";
1107 
1108   unsigned NumLineNumsComputed = 0;
1109   unsigned NumFileBytesMapped = 0;
1110   for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
1111     NumLineNumsComputed += I->second->SourceLineCache != 0;
1112     NumFileBytesMapped  += I->second->getSizeBytesMapped();
1113   }
1114 
1115   llvm::errs() << NumFileBytesMapped << " bytes of files mapped, "
1116                << NumLineNumsComputed << " files with line #'s computed.\n";
1117   llvm::errs() << "FileID scans: " << NumLinearScans << " linear, "
1118                << NumBinaryProbes << " binary.\n";
1119 }
1120 
1121 ExternalSLocEntrySource::~ExternalSLocEntrySource() { }
1122