xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision 3441b4f77ed51d4a5a5bf172e370afa4ac048244)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/SourceManagerInternals.h"
16 #include "clang/Basic/FileManager.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/Support/raw_ostream.h"
20 #include "llvm/System/Path.h"
21 #include <algorithm>
22 using namespace clang;
23 using namespace SrcMgr;
24 using llvm::MemoryBuffer;
25 
26 //===----------------------------------------------------------------------===//
27 // SourceManager Helper Classes
28 //===----------------------------------------------------------------------===//
29 
30 ContentCache::~ContentCache() {
31   delete Buffer;
32 }
33 
34 /// getSizeBytesMapped - Returns the number of bytes actually mapped for
35 ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
36 ///  instantiated.
37 unsigned ContentCache::getSizeBytesMapped() const {
38   return Buffer ? Buffer->getBufferSize() : 0;
39 }
40 
41 /// getSize - Returns the size of the content encapsulated by this ContentCache.
42 ///  This can be the size of the source file or the size of an arbitrary
43 ///  scratch buffer.  If the ContentCache encapsulates a source file, that
44 ///  file is not lazily brought in from disk to satisfy this query.
45 unsigned ContentCache::getSize() const {
46   return Entry ? Entry->getSize() : Buffer->getBufferSize();
47 }
48 
49 const llvm::MemoryBuffer *ContentCache::getBuffer() const {
50   // Lazily create the Buffer for ContentCaches that wrap files.
51   if (!Buffer && Entry) {
52     // FIXME: Should we support a way to not have to do this check over
53     //   and over if we cannot open the file?
54     Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
55   }
56   return Buffer;
57 }
58 
59 unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
60   // Look up the filename in the string table, returning the pre-existing value
61   // if it exists.
62   llvm::StringMapEntry<unsigned> &Entry =
63     FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
64   if (Entry.getValue() != ~0U)
65     return Entry.getValue();
66 
67   // Otherwise, assign this the next available ID.
68   Entry.setValue(FilenamesByID.size());
69   FilenamesByID.push_back(&Entry);
70   return FilenamesByID.size()-1;
71 }
72 
73 /// AddLineNote - Add a line note to the line table that indicates that there
74 /// is a #line at the specified FID/Offset location which changes the presumed
75 /// location to LineNo/FilenameID.
76 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
77                                 unsigned LineNo, int FilenameID) {
78   std::vector<LineEntry> &Entries = LineEntries[FID];
79 
80   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
81          "Adding line entries out of order!");
82 
83   SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
84   unsigned IncludeOffset = 0;
85 
86   if (!Entries.empty()) {
87     // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
88     // that we are still in "foo.h".
89     if (FilenameID == -1)
90       FilenameID = Entries.back().FilenameID;
91 
92     // If we are after a line marker that switched us to system header mode, or
93     // that set #include information, preserve it.
94     Kind = Entries.back().FileKind;
95     IncludeOffset = Entries.back().IncludeOffset;
96   }
97 
98   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
99                                    IncludeOffset));
100 }
101 
102 /// AddLineNote This is the same as the previous version of AddLineNote, but is
103 /// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
104 /// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
105 /// this is a file exit.  FileKind specifies whether this is a system header or
106 /// extern C system header.
107 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
108                                 unsigned LineNo, int FilenameID,
109                                 unsigned EntryExit,
110                                 SrcMgr::CharacteristicKind FileKind) {
111   assert(FilenameID != -1 && "Unspecified filename should use other accessor");
112 
113   std::vector<LineEntry> &Entries = LineEntries[FID];
114 
115   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
116          "Adding line entries out of order!");
117 
118   unsigned IncludeOffset = 0;
119   if (EntryExit == 0) {  // No #include stack change.
120     IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
121   } else if (EntryExit == 1) {
122     IncludeOffset = Offset-1;
123   } else if (EntryExit == 2) {
124     assert(!Entries.empty() && Entries.back().IncludeOffset &&
125        "PPDirectives should have caught case when popping empty include stack");
126 
127     // Get the include loc of the last entries' include loc as our include loc.
128     IncludeOffset = 0;
129     if (const LineEntry *PrevEntry =
130           FindNearestLineEntry(FID, Entries.back().IncludeOffset))
131       IncludeOffset = PrevEntry->IncludeOffset;
132   }
133 
134   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
135                                    IncludeOffset));
136 }
137 
138 
139 /// FindNearestLineEntry - Find the line entry nearest to FID that is before
140 /// it.  If there is no line entry before Offset in FID, return null.
141 const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
142                                                      unsigned Offset) {
143   const std::vector<LineEntry> &Entries = LineEntries[FID];
144   assert(!Entries.empty() && "No #line entries for this FID after all!");
145 
146   // It is very common for the query to be after the last #line, check this
147   // first.
148   if (Entries.back().FileOffset <= Offset)
149     return &Entries.back();
150 
151   // Do a binary search to find the maximal element that is still before Offset.
152   std::vector<LineEntry>::const_iterator I =
153     std::upper_bound(Entries.begin(), Entries.end(), Offset);
154   if (I == Entries.begin()) return 0;
155   return &*--I;
156 }
157 
158 /// \brief Add a new line entry that has already been encoded into
159 /// the internal representation of the line table.
160 void LineTableInfo::AddEntry(unsigned FID,
161                              const std::vector<LineEntry> &Entries) {
162   LineEntries[FID] = Entries;
163 }
164 
165 /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
166 ///
167 unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
168   if (LineTable == 0)
169     LineTable = new LineTableInfo();
170   return LineTable->getLineTableFilenameID(Ptr, Len);
171 }
172 
173 
174 /// AddLineNote - Add a line note to the line table for the FileID and offset
175 /// specified by Loc.  If FilenameID is -1, it is considered to be
176 /// unspecified.
177 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
178                                 int FilenameID) {
179   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
180 
181   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
182 
183   // Remember that this file has #line directives now if it doesn't already.
184   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
185 
186   if (LineTable == 0)
187     LineTable = new LineTableInfo();
188   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
189 }
190 
191 /// AddLineNote - Add a GNU line marker to the line table.
192 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
193                                 int FilenameID, bool IsFileEntry,
194                                 bool IsFileExit, bool IsSystemHeader,
195                                 bool IsExternCHeader) {
196   // If there is no filename and no flags, this is treated just like a #line,
197   // which does not change the flags of the previous line marker.
198   if (FilenameID == -1) {
199     assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
200            "Can't set flags without setting the filename!");
201     return AddLineNote(Loc, LineNo, FilenameID);
202   }
203 
204   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
205   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
206 
207   // Remember that this file has #line directives now if it doesn't already.
208   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
209 
210   if (LineTable == 0)
211     LineTable = new LineTableInfo();
212 
213   SrcMgr::CharacteristicKind FileKind;
214   if (IsExternCHeader)
215     FileKind = SrcMgr::C_ExternCSystem;
216   else if (IsSystemHeader)
217     FileKind = SrcMgr::C_System;
218   else
219     FileKind = SrcMgr::C_User;
220 
221   unsigned EntryExit = 0;
222   if (IsFileEntry)
223     EntryExit = 1;
224   else if (IsFileExit)
225     EntryExit = 2;
226 
227   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
228                          EntryExit, FileKind);
229 }
230 
231 LineTableInfo &SourceManager::getLineTable() {
232   if (LineTable == 0)
233     LineTable = new LineTableInfo();
234   return *LineTable;
235 }
236 
237 //===----------------------------------------------------------------------===//
238 // Private 'Create' methods.
239 //===----------------------------------------------------------------------===//
240 
241 SourceManager::~SourceManager() {
242   delete LineTable;
243 
244   // Delete FileEntry objects corresponding to content caches.  Since the actual
245   // content cache objects are bump pointer allocated, we just have to run the
246   // dtors, but we call the deallocate method for completeness.
247   for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
248     MemBufferInfos[i]->~ContentCache();
249     ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
250   }
251   for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
252        I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
253     I->second->~ContentCache();
254     ContentCacheAlloc.Deallocate(I->second);
255   }
256 }
257 
258 void SourceManager::clearIDTables() {
259   MainFileID = FileID();
260   SLocEntryTable.clear();
261   LastLineNoFileIDQuery = FileID();
262   LastLineNoContentCache = 0;
263   LastFileIDLookup = FileID();
264 
265   if (LineTable)
266     LineTable->clear();
267 
268   // Use up FileID #0 as an invalid instantiation.
269   NextOffset = 0;
270   createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
271 }
272 
273 /// getOrCreateContentCache - Create or return a cached ContentCache for the
274 /// specified file.
275 const ContentCache *
276 SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
277   assert(FileEnt && "Didn't specify a file entry to use?");
278 
279   // Do we already have information about this file?
280   ContentCache *&Entry = FileInfos[FileEnt];
281   if (Entry) return Entry;
282 
283   // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
284   // so that FileInfo can use the low 3 bits of the pointer for its own
285   // nefarious purposes.
286   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
287   EntryAlign = std::max(8U, EntryAlign);
288   Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
289   new (Entry) ContentCache(FileEnt);
290   return Entry;
291 }
292 
293 
294 /// createMemBufferContentCache - Create a new ContentCache for the specified
295 ///  memory buffer.  This does no caching.
296 const ContentCache*
297 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
298   // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
299   // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
300   // the pointer for its own nefarious purposes.
301   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
302   EntryAlign = std::max(8U, EntryAlign);
303   ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
304   new (Entry) ContentCache();
305   MemBufferInfos.push_back(Entry);
306   Entry->setBuffer(Buffer);
307   return Entry;
308 }
309 
310 void SourceManager::PreallocateSLocEntries(ExternalSLocEntrySource *Source,
311                                            unsigned NumSLocEntries,
312                                            unsigned NextOffset) {
313   ExternalSLocEntries = Source;
314   this->NextOffset = NextOffset;
315   SLocEntryLoaded.resize(NumSLocEntries + 1);
316   SLocEntryLoaded[0] = true;
317   SLocEntryTable.resize(SLocEntryTable.size() + NumSLocEntries);
318 }
319 
320 void SourceManager::ClearPreallocatedSLocEntries() {
321   unsigned I = 0;
322   for (unsigned N = SLocEntryLoaded.size(); I != N; ++I)
323     if (!SLocEntryLoaded[I])
324       break;
325 
326   // We've already loaded all preallocated source location entries.
327   if (I == SLocEntryLoaded.size())
328     return;
329 
330   // Remove everything from location I onward.
331   SLocEntryTable.resize(I);
332   SLocEntryLoaded.clear();
333   ExternalSLocEntries = 0;
334 }
335 
336 
337 //===----------------------------------------------------------------------===//
338 // Methods to create new FileID's and instantiations.
339 //===----------------------------------------------------------------------===//
340 
341 /// createFileID - Create a new fileID for the specified ContentCache and
342 /// include position.  This works regardless of whether the ContentCache
343 /// corresponds to a file or some other input source.
344 FileID SourceManager::createFileID(const ContentCache *File,
345                                    SourceLocation IncludePos,
346                                    SrcMgr::CharacteristicKind FileCharacter,
347                                    unsigned PreallocatedID,
348                                    unsigned Offset) {
349   if (PreallocatedID) {
350     // If we're filling in a preallocated ID, just load in the file
351     // entry and return.
352     assert(PreallocatedID < SLocEntryLoaded.size() &&
353            "Preallocate ID out-of-range");
354     assert(!SLocEntryLoaded[PreallocatedID] &&
355            "Source location entry already loaded");
356     assert(Offset && "Preallocate source location cannot have zero offset");
357     SLocEntryTable[PreallocatedID]
358       = SLocEntry::get(Offset, FileInfo::get(IncludePos, File, FileCharacter));
359     SLocEntryLoaded[PreallocatedID] = true;
360     FileID FID = FileID::get(PreallocatedID);
361     if (File->FirstFID.isInvalid())
362       File->FirstFID = FID;
363     return LastFileIDLookup = FID;
364   }
365 
366   SLocEntryTable.push_back(SLocEntry::get(NextOffset,
367                                           FileInfo::get(IncludePos, File,
368                                                         FileCharacter)));
369   unsigned FileSize = File->getSize();
370   assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
371   NextOffset += FileSize+1;
372 
373   // Set LastFileIDLookup to the newly created file.  The next getFileID call is
374   // almost guaranteed to be from that file.
375   FileID FID = FileID::get(SLocEntryTable.size()-1);
376   if (File->FirstFID.isInvalid())
377     File->FirstFID = FID;
378   return LastFileIDLookup = FID;
379 }
380 
381 /// createInstantiationLoc - Return a new SourceLocation that encodes the fact
382 /// that a token from SpellingLoc should actually be referenced from
383 /// InstantiationLoc.
384 SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
385                                                      SourceLocation ILocStart,
386                                                      SourceLocation ILocEnd,
387                                                      unsigned TokLength,
388                                                      unsigned PreallocatedID,
389                                                      unsigned Offset) {
390   InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
391   if (PreallocatedID) {
392     // If we're filling in a preallocated ID, just load in the
393     // instantiation entry and return.
394     assert(PreallocatedID < SLocEntryLoaded.size() &&
395            "Preallocate ID out-of-range");
396     assert(!SLocEntryLoaded[PreallocatedID] &&
397            "Source location entry already loaded");
398     assert(Offset && "Preallocate source location cannot have zero offset");
399     SLocEntryTable[PreallocatedID] = SLocEntry::get(Offset, II);
400     SLocEntryLoaded[PreallocatedID] = true;
401     return SourceLocation::getMacroLoc(Offset);
402   }
403   SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
404   assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
405   NextOffset += TokLength+1;
406   return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
407 }
408 
409 /// getBufferData - Return a pointer to the start and end of the source buffer
410 /// data for the specified FileID.
411 std::pair<const char*, const char*>
412 SourceManager::getBufferData(FileID FID) const {
413   const llvm::MemoryBuffer *Buf = getBuffer(FID);
414   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
415 }
416 
417 
418 //===----------------------------------------------------------------------===//
419 // SourceLocation manipulation methods.
420 //===----------------------------------------------------------------------===//
421 
422 /// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
423 /// method that is used for all SourceManager queries that start with a
424 /// SourceLocation object.  It is responsible for finding the entry in
425 /// SLocEntryTable which contains the specified location.
426 ///
427 FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
428   assert(SLocOffset && "Invalid FileID");
429 
430   // After the first and second level caches, I see two common sorts of
431   // behavior: 1) a lot of searched FileID's are "near" the cached file location
432   // or are "near" the cached instantiation location.  2) others are just
433   // completely random and may be a very long way away.
434   //
435   // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
436   // then we fall back to a less cache efficient, but more scalable, binary
437   // search to find the location.
438 
439   // See if this is near the file point - worst case we start scanning from the
440   // most newly created FileID.
441   std::vector<SrcMgr::SLocEntry>::const_iterator I;
442 
443   if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
444     // Neither loc prunes our search.
445     I = SLocEntryTable.end();
446   } else {
447     // Perhaps it is near the file point.
448     I = SLocEntryTable.begin()+LastFileIDLookup.ID;
449   }
450 
451   // Find the FileID that contains this.  "I" is an iterator that points to a
452   // FileID whose offset is known to be larger than SLocOffset.
453   unsigned NumProbes = 0;
454   while (1) {
455     --I;
456     if (ExternalSLocEntries)
457       getSLocEntry(FileID::get(I - SLocEntryTable.begin()));
458     if (I->getOffset() <= SLocOffset) {
459 #if 0
460       printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
461              I-SLocEntryTable.begin(),
462              I->isInstantiation() ? "inst" : "file",
463              LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
464 #endif
465       FileID Res = FileID::get(I-SLocEntryTable.begin());
466 
467       // If this isn't an instantiation, remember it.  We have good locality
468       // across FileID lookups.
469       if (!I->isInstantiation())
470         LastFileIDLookup = Res;
471       NumLinearScans += NumProbes+1;
472       return Res;
473     }
474     if (++NumProbes == 8)
475       break;
476   }
477 
478   // Convert "I" back into an index.  We know that it is an entry whose index is
479   // larger than the offset we are looking for.
480   unsigned GreaterIndex = I-SLocEntryTable.begin();
481   // LessIndex - This is the lower bound of the range that we're searching.
482   // We know that the offset corresponding to the FileID is is less than
483   // SLocOffset.
484   unsigned LessIndex = 0;
485   NumProbes = 0;
486   while (1) {
487     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
488     unsigned MidOffset = getSLocEntry(FileID::get(MiddleIndex)).getOffset();
489 
490     ++NumProbes;
491 
492     // If the offset of the midpoint is too large, chop the high side of the
493     // range to the midpoint.
494     if (MidOffset > SLocOffset) {
495       GreaterIndex = MiddleIndex;
496       continue;
497     }
498 
499     // If the middle index contains the value, succeed and return.
500     if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
501 #if 0
502       printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
503              I-SLocEntryTable.begin(),
504              I->isInstantiation() ? "inst" : "file",
505              LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
506 #endif
507       FileID Res = FileID::get(MiddleIndex);
508 
509       // If this isn't an instantiation, remember it.  We have good locality
510       // across FileID lookups.
511       if (!I->isInstantiation())
512         LastFileIDLookup = Res;
513       NumBinaryProbes += NumProbes;
514       return Res;
515     }
516 
517     // Otherwise, move the low-side up to the middle index.
518     LessIndex = MiddleIndex;
519   }
520 }
521 
522 SourceLocation SourceManager::
523 getInstantiationLocSlowCase(SourceLocation Loc) const {
524   do {
525     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
526     Loc = getSLocEntry(LocInfo.first).getInstantiation()
527                    .getInstantiationLocStart();
528     Loc = Loc.getFileLocWithOffset(LocInfo.second);
529   } while (!Loc.isFileID());
530 
531   return Loc;
532 }
533 
534 SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
535   do {
536     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
537     Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
538     Loc = Loc.getFileLocWithOffset(LocInfo.second);
539   } while (!Loc.isFileID());
540   return Loc;
541 }
542 
543 
544 std::pair<FileID, unsigned>
545 SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
546                                                      unsigned Offset) const {
547   // If this is an instantiation record, walk through all the instantiation
548   // points.
549   FileID FID;
550   SourceLocation Loc;
551   do {
552     Loc = E->getInstantiation().getInstantiationLocStart();
553 
554     FID = getFileID(Loc);
555     E = &getSLocEntry(FID);
556     Offset += Loc.getOffset()-E->getOffset();
557   } while (!Loc.isFileID());
558 
559   return std::make_pair(FID, Offset);
560 }
561 
562 std::pair<FileID, unsigned>
563 SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
564                                                 unsigned Offset) const {
565   // If this is an instantiation record, walk through all the instantiation
566   // points.
567   FileID FID;
568   SourceLocation Loc;
569   do {
570     Loc = E->getInstantiation().getSpellingLoc();
571 
572     FID = getFileID(Loc);
573     E = &getSLocEntry(FID);
574     Offset += Loc.getOffset()-E->getOffset();
575   } while (!Loc.isFileID());
576 
577   return std::make_pair(FID, Offset);
578 }
579 
580 /// getImmediateSpellingLoc - Given a SourceLocation object, return the
581 /// spelling location referenced by the ID.  This is the first level down
582 /// towards the place where the characters that make up the lexed token can be
583 /// found.  This should not generally be used by clients.
584 SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
585   if (Loc.isFileID()) return Loc;
586   std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
587   Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
588   return Loc.getFileLocWithOffset(LocInfo.second);
589 }
590 
591 
592 /// getImmediateInstantiationRange - Loc is required to be an instantiation
593 /// location.  Return the start/end of the instantiation information.
594 std::pair<SourceLocation,SourceLocation>
595 SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
596   assert(Loc.isMacroID() && "Not an instantiation loc!");
597   const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
598   return II.getInstantiationLocRange();
599 }
600 
601 /// getInstantiationRange - Given a SourceLocation object, return the
602 /// range of tokens covered by the instantiation in the ultimate file.
603 std::pair<SourceLocation,SourceLocation>
604 SourceManager::getInstantiationRange(SourceLocation Loc) const {
605   if (Loc.isFileID()) return std::make_pair(Loc, Loc);
606 
607   std::pair<SourceLocation,SourceLocation> Res =
608     getImmediateInstantiationRange(Loc);
609 
610   // Fully resolve the start and end locations to their ultimate instantiation
611   // points.
612   while (!Res.first.isFileID())
613     Res.first = getImmediateInstantiationRange(Res.first).first;
614   while (!Res.second.isFileID())
615     Res.second = getImmediateInstantiationRange(Res.second).second;
616   return Res;
617 }
618 
619 
620 
621 //===----------------------------------------------------------------------===//
622 // Queries about the code at a SourceLocation.
623 //===----------------------------------------------------------------------===//
624 
625 /// getCharacterData - Return a pointer to the start of the specified location
626 /// in the appropriate MemoryBuffer.
627 const char *SourceManager::getCharacterData(SourceLocation SL) const {
628   // Note that this is a hot function in the getSpelling() path, which is
629   // heavily used by -E mode.
630   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
631 
632   // Note that calling 'getBuffer()' may lazily page in a source file.
633   return getSLocEntry(LocInfo.first).getFile().getContentCache()
634               ->getBuffer()->getBufferStart() + LocInfo.second;
635 }
636 
637 
638 /// getColumnNumber - Return the column # for the specified file position.
639 /// this is significantly cheaper to compute than the line number.
640 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
641   const char *Buf = getBuffer(FID)->getBufferStart();
642 
643   unsigned LineStart = FilePos;
644   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
645     --LineStart;
646   return FilePos-LineStart+1;
647 }
648 
649 unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
650   if (Loc.isInvalid()) return 0;
651   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
652   return getColumnNumber(LocInfo.first, LocInfo.second);
653 }
654 
655 unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
656   if (Loc.isInvalid()) return 0;
657   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
658   return getColumnNumber(LocInfo.first, LocInfo.second);
659 }
660 
661 
662 
663 static void ComputeLineNumbers(ContentCache* FI,
664                                llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
665 static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
666   // Note that calling 'getBuffer()' may lazily page in the file.
667   const MemoryBuffer *Buffer = FI->getBuffer();
668 
669   // Find the file offsets of all of the *physical* source lines.  This does
670   // not look at trigraphs, escaped newlines, or anything else tricky.
671   std::vector<unsigned> LineOffsets;
672 
673   // Line #1 starts at char 0.
674   LineOffsets.push_back(0);
675 
676   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
677   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
678   unsigned Offs = 0;
679   while (1) {
680     // Skip over the contents of the line.
681     // TODO: Vectorize this?  This is very performance sensitive for programs
682     // with lots of diagnostics and in -E mode.
683     const unsigned char *NextBuf = (const unsigned char *)Buf;
684     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
685       ++NextBuf;
686     Offs += NextBuf-Buf;
687     Buf = NextBuf;
688 
689     if (Buf[0] == '\n' || Buf[0] == '\r') {
690       // If this is \n\r or \r\n, skip both characters.
691       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
692         ++Offs, ++Buf;
693       ++Offs, ++Buf;
694       LineOffsets.push_back(Offs);
695     } else {
696       // Otherwise, this is a null.  If end of file, exit.
697       if (Buf == End) break;
698       // Otherwise, skip the null.
699       ++Offs, ++Buf;
700     }
701   }
702 
703   // Copy the offsets into the FileInfo structure.
704   FI->NumLines = LineOffsets.size();
705   FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
706   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
707 }
708 
709 /// getLineNumber - Given a SourceLocation, return the spelling line number
710 /// for the position indicated.  This requires building and caching a table of
711 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
712 /// about to emit a diagnostic.
713 unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
714   ContentCache *Content;
715   if (LastLineNoFileIDQuery == FID)
716     Content = LastLineNoContentCache;
717   else
718     Content = const_cast<ContentCache*>(getSLocEntry(FID)
719                                         .getFile().getContentCache());
720 
721   // If this is the first use of line information for this buffer, compute the
722   /// SourceLineCache for it on demand.
723   if (Content->SourceLineCache == 0)
724     ComputeLineNumbers(Content, ContentCacheAlloc);
725 
726   // Okay, we know we have a line number table.  Do a binary search to find the
727   // line number that this character position lands on.
728   unsigned *SourceLineCache = Content->SourceLineCache;
729   unsigned *SourceLineCacheStart = SourceLineCache;
730   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
731 
732   unsigned QueriedFilePos = FilePos+1;
733 
734   // FIXME: I would like to be convinced that this code is worth being as
735   // complicated as it is, binary search isn't that slow.
736   //
737   // If it is worth being optimized, then in my opinion it could be more
738   // performant, simpler, and more obviously correct by just "galloping" outward
739   // from the queried file position. In fact, this could be incorporated into a
740   // generic algorithm such as lower_bound_with_hint.
741   //
742   // If someone gives me a test case where this matters, and I will do it! - DWD
743 
744   // If the previous query was to the same file, we know both the file pos from
745   // that query and the line number returned.  This allows us to narrow the
746   // search space from the entire file to something near the match.
747   if (LastLineNoFileIDQuery == FID) {
748     if (QueriedFilePos >= LastLineNoFilePos) {
749       // FIXME: Potential overflow?
750       SourceLineCache = SourceLineCache+LastLineNoResult-1;
751 
752       // The query is likely to be nearby the previous one.  Here we check to
753       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
754       // where big comment blocks and vertical whitespace eat up lines but
755       // contribute no tokens.
756       if (SourceLineCache+5 < SourceLineCacheEnd) {
757         if (SourceLineCache[5] > QueriedFilePos)
758           SourceLineCacheEnd = SourceLineCache+5;
759         else if (SourceLineCache+10 < SourceLineCacheEnd) {
760           if (SourceLineCache[10] > QueriedFilePos)
761             SourceLineCacheEnd = SourceLineCache+10;
762           else if (SourceLineCache+20 < SourceLineCacheEnd) {
763             if (SourceLineCache[20] > QueriedFilePos)
764               SourceLineCacheEnd = SourceLineCache+20;
765           }
766         }
767       }
768     } else {
769       if (LastLineNoResult < Content->NumLines)
770         SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
771     }
772   }
773 
774   // If the spread is large, do a "radix" test as our initial guess, based on
775   // the assumption that lines average to approximately the same length.
776   // NOTE: This is currently disabled, as it does not appear to be profitable in
777   // initial measurements.
778   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
779     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
780 
781     // Take a stab at guessing where it is.
782     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
783 
784     // Check for -10 and +10 lines.
785     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
786     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
787 
788     // If the computed lower bound is less than the query location, move it in.
789     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
790         SourceLineCacheStart[LowerBound] < QueriedFilePos)
791       SourceLineCache = SourceLineCacheStart+LowerBound;
792 
793     // If the computed upper bound is greater than the query location, move it.
794     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
795         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
796       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
797   }
798 
799   unsigned *Pos
800     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
801   unsigned LineNo = Pos-SourceLineCacheStart;
802 
803   LastLineNoFileIDQuery = FID;
804   LastLineNoContentCache = Content;
805   LastLineNoFilePos = QueriedFilePos;
806   LastLineNoResult = LineNo;
807   return LineNo;
808 }
809 
810 unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
811   if (Loc.isInvalid()) return 0;
812   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
813   return getLineNumber(LocInfo.first, LocInfo.second);
814 }
815 unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
816   if (Loc.isInvalid()) return 0;
817   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
818   return getLineNumber(LocInfo.first, LocInfo.second);
819 }
820 
821 /// getFileCharacteristic - return the file characteristic of the specified
822 /// source location, indicating whether this is a normal file, a system
823 /// header, or an "implicit extern C" system header.
824 ///
825 /// This state can be modified with flags on GNU linemarker directives like:
826 ///   # 4 "foo.h" 3
827 /// which changes all source locations in the current file after that to be
828 /// considered to be from a system header.
829 SrcMgr::CharacteristicKind
830 SourceManager::getFileCharacteristic(SourceLocation Loc) const {
831   assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
832   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
833   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
834 
835   // If there are no #line directives in this file, just return the whole-file
836   // state.
837   if (!FI.hasLineDirectives())
838     return FI.getFileCharacteristic();
839 
840   assert(LineTable && "Can't have linetable entries without a LineTable!");
841   // See if there is a #line directive before the location.
842   const LineEntry *Entry =
843     LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
844 
845   // If this is before the first line marker, use the file characteristic.
846   if (!Entry)
847     return FI.getFileCharacteristic();
848 
849   return Entry->FileKind;
850 }
851 
852 /// Return the filename or buffer identifier of the buffer the location is in.
853 /// Note that this name does not respect #line directives.  Use getPresumedLoc
854 /// for normal clients.
855 const char *SourceManager::getBufferName(SourceLocation Loc) const {
856   if (Loc.isInvalid()) return "<invalid loc>";
857 
858   return getBuffer(getFileID(Loc))->getBufferIdentifier();
859 }
860 
861 
862 /// getPresumedLoc - This method returns the "presumed" location of a
863 /// SourceLocation specifies.  A "presumed location" can be modified by #line
864 /// or GNU line marker directives.  This provides a view on the data that a
865 /// user should see in diagnostics, for example.
866 ///
867 /// Note that a presumed location is always given as the instantiation point
868 /// of an instantiation location, not at the spelling location.
869 PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
870   if (Loc.isInvalid()) return PresumedLoc();
871 
872   // Presumed locations are always for instantiation points.
873   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
874 
875   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
876   const SrcMgr::ContentCache *C = FI.getContentCache();
877 
878   // To get the source name, first consult the FileEntry (if one exists)
879   // before the MemBuffer as this will avoid unnecessarily paging in the
880   // MemBuffer.
881   const char *Filename =
882     C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
883   unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
884   unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
885   SourceLocation IncludeLoc = FI.getIncludeLoc();
886 
887   // If we have #line directives in this file, update and overwrite the physical
888   // location info if appropriate.
889   if (FI.hasLineDirectives()) {
890     assert(LineTable && "Can't have linetable entries without a LineTable!");
891     // See if there is a #line directive before this.  If so, get it.
892     if (const LineEntry *Entry =
893           LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
894       // If the LineEntry indicates a filename, use it.
895       if (Entry->FilenameID != -1)
896         Filename = LineTable->getFilename(Entry->FilenameID);
897 
898       // Use the line number specified by the LineEntry.  This line number may
899       // be multiple lines down from the line entry.  Add the difference in
900       // physical line numbers from the query point and the line marker to the
901       // total.
902       unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
903       LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
904 
905       // Note that column numbers are not molested by line markers.
906 
907       // Handle virtual #include manipulation.
908       if (Entry->IncludeOffset) {
909         IncludeLoc = getLocForStartOfFile(LocInfo.first);
910         IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
911       }
912     }
913   }
914 
915   return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
916 }
917 
918 //===----------------------------------------------------------------------===//
919 // Other miscellaneous methods.
920 //===----------------------------------------------------------------------===//
921 
922 /// \brief Get the source location for the given file:line:col triplet.
923 ///
924 /// If the source file is included multiple times, the source location will
925 /// be based upon the first inclusion.
926 SourceLocation SourceManager::getLocation(const FileEntry *SourceFile,
927                                           unsigned Line, unsigned Col) const {
928   assert(SourceFile && "Null source file!");
929   assert(Line && Col && "Line and column should start from 1!");
930 
931   fileinfo_iterator FI = FileInfos.find(SourceFile);
932   if (FI == FileInfos.end())
933     return SourceLocation();
934   ContentCache *Content = FI->second;
935 
936   // If this is the first use of line information for this buffer, compute the
937   /// SourceLineCache for it on demand.
938   if (Content->SourceLineCache == 0)
939     ComputeLineNumbers(Content, ContentCacheAlloc);
940 
941   if (Line > Content->NumLines)
942     return SourceLocation();
943 
944   unsigned FilePos = Content->SourceLineCache[Line - 1];
945   const char *Buf = Content->getBuffer()->getBufferStart() + FilePos;
946   unsigned BufLength = Content->getBuffer()->getBufferEnd() - Buf;
947   unsigned i = 0;
948 
949   // Check that the given column is valid.
950   while (i < BufLength-1 && i < Col-1 && Buf[i] != '\n' && Buf[i] != '\r')
951     ++i;
952   if (i < Col-1)
953     return SourceLocation();
954 
955   return getLocForStartOfFile(Content->FirstFID).
956             getFileLocWithOffset(FilePos + Col - 1);
957 }
958 
959 /// \brief Determines the order of 2 source locations in the translation unit.
960 ///
961 /// \returns true if LHS source location comes before RHS, false otherwise.
962 bool SourceManager::isBeforeInTranslationUnit(SourceLocation LHS,
963                                               SourceLocation RHS) const {
964   assert(LHS.isValid() && RHS.isValid() && "Passed invalid source location!");
965   if (LHS == RHS)
966     return false;
967 
968   std::pair<FileID, unsigned> LOffs = getDecomposedLoc(LHS);
969   std::pair<FileID, unsigned> ROffs = getDecomposedLoc(RHS);
970 
971   // If the source locations are in the same file, just compare offsets.
972   if (LOffs.first == ROffs.first)
973     return LOffs.second < ROffs.second;
974 
975   // If we are comparing a source location with multiple locations in the same
976   // file, we get a big win by caching the result.
977 
978   if (LastLFIDForBeforeTUCheck == LOffs.first &&
979       LastRFIDForBeforeTUCheck == ROffs.first)
980     return LastResForBeforeTUCheck;
981 
982   LastLFIDForBeforeTUCheck = LOffs.first;
983   LastRFIDForBeforeTUCheck = ROffs.first;
984 
985   // "Traverse" the include/instantiation stacks of both locations and try to
986   // find a common "ancestor".
987   //
988   // First we traverse the stack of the right location and check each level
989   // against the level of the left location, while collecting all levels in a
990   // "stack map".
991 
992   std::map<FileID, unsigned> ROffsMap;
993   ROffsMap[ROffs.first] = ROffs.second;
994 
995   while (1) {
996     SourceLocation UpperLoc;
997     const SrcMgr::SLocEntry &Entry = getSLocEntry(ROffs.first);
998     if (Entry.isInstantiation())
999       UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
1000     else
1001       UpperLoc = Entry.getFile().getIncludeLoc();
1002 
1003     if (UpperLoc.isInvalid())
1004       break; // We reached the top.
1005 
1006     ROffs = getDecomposedLoc(UpperLoc);
1007 
1008     if (LOffs.first == ROffs.first)
1009       return LastResForBeforeTUCheck = LOffs.second < ROffs.second;
1010 
1011     ROffsMap[ROffs.first] = ROffs.second;
1012   }
1013 
1014   // We didn't find a common ancestor. Now traverse the stack of the left
1015   // location, checking against the stack map of the right location.
1016 
1017   while (1) {
1018     SourceLocation UpperLoc;
1019     const SrcMgr::SLocEntry &Entry = getSLocEntry(LOffs.first);
1020     if (Entry.isInstantiation())
1021       UpperLoc = Entry.getInstantiation().getInstantiationLocStart();
1022     else
1023       UpperLoc = Entry.getFile().getIncludeLoc();
1024 
1025     if (UpperLoc.isInvalid())
1026       break; // We reached the top.
1027 
1028     LOffs = getDecomposedLoc(UpperLoc);
1029 
1030     std::map<FileID, unsigned>::iterator I = ROffsMap.find(LOffs.first);
1031     if (I != ROffsMap.end())
1032       return LastResForBeforeTUCheck = LOffs.second < I->second;
1033   }
1034 
1035   // No common ancestor.
1036   // Now we are getting into murky waters. Most probably this is because one
1037   // location is in the predefines buffer.
1038 
1039   const FileEntry *LEntry =
1040     getSLocEntry(LOffs.first).getFile().getContentCache()->Entry;
1041   const FileEntry *REntry =
1042     getSLocEntry(ROffs.first).getFile().getContentCache()->Entry;
1043 
1044   // If the locations are in two memory buffers we give up, we can't answer
1045   // which one should be considered first.
1046   // FIXME: Should there be a way to "include" memory buffers in the translation
1047   // unit ?
1048   assert((LEntry != 0 || REntry != 0) && "Locations in memory buffers.");
1049   (void) REntry;
1050 
1051   // Consider the memory buffer as coming before the file in the translation
1052   // unit.
1053   if (LEntry == 0)
1054     return LastResForBeforeTUCheck = true;
1055   else {
1056     assert(REntry == 0 && "Locations in not #included files ?");
1057     return LastResForBeforeTUCheck = false;
1058   }
1059 }
1060 
1061 /// PrintStats - Print statistics to stderr.
1062 ///
1063 void SourceManager::PrintStats() const {
1064   llvm::errs() << "\n*** Source Manager Stats:\n";
1065   llvm::errs() << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
1066                << " mem buffers mapped.\n";
1067   llvm::errs() << SLocEntryTable.size() << " SLocEntry's allocated, "
1068                << NextOffset << "B of Sloc address space used.\n";
1069 
1070   unsigned NumLineNumsComputed = 0;
1071   unsigned NumFileBytesMapped = 0;
1072   for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
1073     NumLineNumsComputed += I->second->SourceLineCache != 0;
1074     NumFileBytesMapped  += I->second->getSizeBytesMapped();
1075   }
1076 
1077   llvm::errs() << NumFileBytesMapped << " bytes of files mapped, "
1078                << NumLineNumsComputed << " files with line #'s computed.\n";
1079   llvm::errs() << "FileID scans: " << NumLinearScans << " linear, "
1080                << NumBinaryProbes << " binary.\n";
1081 }
1082 
1083 ExternalSLocEntrySource::~ExternalSLocEntrySource() { }
1084