xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision a07ebc5b9538ed2af600da976eebce405ee16bf5)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/SourceManagerInternals.h"
16 #include "clang/Basic/FileManager.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/System/Path.h"
20 #include "llvm/Bitcode/Serialize.h"
21 #include "llvm/Bitcode/Deserialize.h"
22 #include "llvm/Support/Streams.h"
23 #include <algorithm>
24 using namespace clang;
25 using namespace SrcMgr;
26 using llvm::MemoryBuffer;
27 
28 //===----------------------------------------------------------------------===//
29 // SourceManager Helper Classes
30 //===----------------------------------------------------------------------===//
31 
32 ContentCache::~ContentCache() {
33   delete Buffer;
34 }
35 
36 /// getSizeBytesMapped - Returns the number of bytes actually mapped for
37 ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
38 ///  instantiated.
39 unsigned ContentCache::getSizeBytesMapped() const {
40   return Buffer ? Buffer->getBufferSize() : 0;
41 }
42 
43 /// getSize - Returns the size of the content encapsulated by this ContentCache.
44 ///  This can be the size of the source file or the size of an arbitrary
45 ///  scratch buffer.  If the ContentCache encapsulates a source file, that
46 ///  file is not lazily brought in from disk to satisfy this query.
47 unsigned ContentCache::getSize() const {
48   return Entry ? Entry->getSize() : Buffer->getBufferSize();
49 }
50 
51 const llvm::MemoryBuffer *ContentCache::getBuffer() const {
52   // Lazily create the Buffer for ContentCaches that wrap files.
53   if (!Buffer && Entry) {
54     // FIXME: Should we support a way to not have to do this check over
55     //   and over if we cannot open the file?
56     Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
57   }
58   return Buffer;
59 }
60 
61 unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
62   // Look up the filename in the string table, returning the pre-existing value
63   // if it exists.
64   llvm::StringMapEntry<unsigned> &Entry =
65     FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
66   if (Entry.getValue() != ~0U)
67     return Entry.getValue();
68 
69   // Otherwise, assign this the next available ID.
70   Entry.setValue(FilenamesByID.size());
71   FilenamesByID.push_back(&Entry);
72   return FilenamesByID.size()-1;
73 }
74 
75 /// AddLineNote - Add a line note to the line table that indicates that there
76 /// is a #line at the specified FID/Offset location which changes the presumed
77 /// location to LineNo/FilenameID.
78 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
79                                 unsigned LineNo, int FilenameID) {
80   std::vector<LineEntry> &Entries = LineEntries[FID];
81 
82   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
83          "Adding line entries out of order!");
84 
85   SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
86   unsigned IncludeOffset = 0;
87 
88   if (!Entries.empty()) {
89     // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
90     // that we are still in "foo.h".
91     if (FilenameID == -1)
92       FilenameID = Entries.back().FilenameID;
93 
94     // If we are after a line marker that switched us to system header mode, or
95     // that set #include information, preserve it.
96     Kind = Entries.back().FileKind;
97     IncludeOffset = Entries.back().IncludeOffset;
98   }
99 
100   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
101                                    IncludeOffset));
102 }
103 
104 /// AddLineNote This is the same as the previous version of AddLineNote, but is
105 /// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
106 /// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
107 /// this is a file exit.  FileKind specifies whether this is a system header or
108 /// extern C system header.
109 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
110                                 unsigned LineNo, int FilenameID,
111                                 unsigned EntryExit,
112                                 SrcMgr::CharacteristicKind FileKind) {
113   assert(FilenameID != -1 && "Unspecified filename should use other accessor");
114 
115   std::vector<LineEntry> &Entries = LineEntries[FID];
116 
117   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
118          "Adding line entries out of order!");
119 
120   unsigned IncludeOffset = 0;
121   if (EntryExit == 0) {  // No #include stack change.
122     IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
123   } else if (EntryExit == 1) {
124     IncludeOffset = Offset-1;
125   } else if (EntryExit == 2) {
126     assert(!Entries.empty() && Entries.back().IncludeOffset &&
127        "PPDirectives should have caught case when popping empty include stack");
128 
129     // Get the include loc of the last entries' include loc as our include loc.
130     IncludeOffset = 0;
131     if (const LineEntry *PrevEntry =
132           FindNearestLineEntry(FID, Entries.back().IncludeOffset))
133       IncludeOffset = PrevEntry->IncludeOffset;
134   }
135 
136   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
137                                    IncludeOffset));
138 }
139 
140 
141 /// FindNearestLineEntry - Find the line entry nearest to FID that is before
142 /// it.  If there is no line entry before Offset in FID, return null.
143 const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
144                                                      unsigned Offset) {
145   const std::vector<LineEntry> &Entries = LineEntries[FID];
146   assert(!Entries.empty() && "No #line entries for this FID after all!");
147 
148   // It is very common for the query to be after the last #line, check this
149   // first.
150   if (Entries.back().FileOffset <= Offset)
151     return &Entries.back();
152 
153   // Do a binary search to find the maximal element that is still before Offset.
154   std::vector<LineEntry>::const_iterator I =
155     std::upper_bound(Entries.begin(), Entries.end(), Offset);
156   if (I == Entries.begin()) return 0;
157   return &*--I;
158 }
159 
160 
161 /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
162 ///
163 unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
164   if (LineTable == 0)
165     LineTable = new LineTableInfo();
166   return LineTable->getLineTableFilenameID(Ptr, Len);
167 }
168 
169 
170 /// AddLineNote - Add a line note to the line table for the FileID and offset
171 /// specified by Loc.  If FilenameID is -1, it is considered to be
172 /// unspecified.
173 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
174                                 int FilenameID) {
175   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
176 
177   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
178 
179   // Remember that this file has #line directives now if it doesn't already.
180   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
181 
182   if (LineTable == 0)
183     LineTable = new LineTableInfo();
184   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
185 }
186 
187 /// AddLineNote - Add a GNU line marker to the line table.
188 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
189                                 int FilenameID, bool IsFileEntry,
190                                 bool IsFileExit, bool IsSystemHeader,
191                                 bool IsExternCHeader) {
192   // If there is no filename and no flags, this is treated just like a #line,
193   // which does not change the flags of the previous line marker.
194   if (FilenameID == -1) {
195     assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
196            "Can't set flags without setting the filename!");
197     return AddLineNote(Loc, LineNo, FilenameID);
198   }
199 
200   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
201   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
202 
203   // Remember that this file has #line directives now if it doesn't already.
204   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
205 
206   if (LineTable == 0)
207     LineTable = new LineTableInfo();
208 
209   SrcMgr::CharacteristicKind FileKind;
210   if (IsExternCHeader)
211     FileKind = SrcMgr::C_ExternCSystem;
212   else if (IsSystemHeader)
213     FileKind = SrcMgr::C_System;
214   else
215     FileKind = SrcMgr::C_User;
216 
217   unsigned EntryExit = 0;
218   if (IsFileEntry)
219     EntryExit = 1;
220   else if (IsFileExit)
221     EntryExit = 2;
222 
223   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
224                          EntryExit, FileKind);
225 }
226 
227 
228 //===----------------------------------------------------------------------===//
229 // Private 'Create' methods.
230 //===----------------------------------------------------------------------===//
231 
232 SourceManager::~SourceManager() {
233   delete LineTable;
234 
235   // Delete FileEntry objects corresponding to content caches.  Since the actual
236   // content cache objects are bump pointer allocated, we just have to run the
237   // dtors, but we call the deallocate method for completeness.
238   for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
239     MemBufferInfos[i]->~ContentCache();
240     ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
241   }
242   for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
243        I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
244     I->second->~ContentCache();
245     ContentCacheAlloc.Deallocate(I->second);
246   }
247 }
248 
249 void SourceManager::clearIDTables() {
250   MainFileID = FileID();
251   SLocEntryTable.clear();
252   LastLineNoFileIDQuery = FileID();
253   LastLineNoContentCache = 0;
254   LastFileIDLookup = FileID();
255 
256   if (LineTable)
257     LineTable->clear();
258 
259   // Use up FileID #0 as an invalid instantiation.
260   NextOffset = 0;
261   createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
262 }
263 
264 /// getOrCreateContentCache - Create or return a cached ContentCache for the
265 /// specified file.
266 const ContentCache *
267 SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
268   assert(FileEnt && "Didn't specify a file entry to use?");
269 
270   // Do we already have information about this file?
271   ContentCache *&Entry = FileInfos[FileEnt];
272   if (Entry) return Entry;
273 
274   // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
275   // so that FileInfo can use the low 3 bits of the pointer for its own
276   // nefarious purposes.
277   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
278   EntryAlign = std::max(8U, EntryAlign);
279   Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
280   new (Entry) ContentCache(FileEnt);
281   return Entry;
282 }
283 
284 
285 /// createMemBufferContentCache - Create a new ContentCache for the specified
286 ///  memory buffer.  This does no caching.
287 const ContentCache*
288 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
289   // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
290   // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
291   // the pointer for its own nefarious purposes.
292   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
293   EntryAlign = std::max(8U, EntryAlign);
294   ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
295   new (Entry) ContentCache();
296   MemBufferInfos.push_back(Entry);
297   Entry->setBuffer(Buffer);
298   return Entry;
299 }
300 
301 //===----------------------------------------------------------------------===//
302 // Methods to create new FileID's and instantiations.
303 //===----------------------------------------------------------------------===//
304 
305 /// createFileID - Create a new fileID for the specified ContentCache and
306 /// include position.  This works regardless of whether the ContentCache
307 /// corresponds to a file or some other input source.
308 FileID SourceManager::createFileID(const ContentCache *File,
309                                    SourceLocation IncludePos,
310                                    SrcMgr::CharacteristicKind FileCharacter) {
311   SLocEntryTable.push_back(SLocEntry::get(NextOffset,
312                                           FileInfo::get(IncludePos, File,
313                                                         FileCharacter)));
314   unsigned FileSize = File->getSize();
315   assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
316   NextOffset += FileSize+1;
317 
318   // Set LastFileIDLookup to the newly created file.  The next getFileID call is
319   // almost guaranteed to be from that file.
320   return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
321 }
322 
323 /// createInstantiationLoc - Return a new SourceLocation that encodes the fact
324 /// that a token from SpellingLoc should actually be referenced from
325 /// InstantiationLoc.
326 SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
327                                                      SourceLocation ILocStart,
328                                                      SourceLocation ILocEnd,
329                                                      unsigned TokLength) {
330   InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
331   SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
332   assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
333   NextOffset += TokLength+1;
334   return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
335 }
336 
337 /// getBufferData - Return a pointer to the start and end of the source buffer
338 /// data for the specified FileID.
339 std::pair<const char*, const char*>
340 SourceManager::getBufferData(FileID FID) const {
341   const llvm::MemoryBuffer *Buf = getBuffer(FID);
342   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
343 }
344 
345 
346 //===----------------------------------------------------------------------===//
347 // SourceLocation manipulation methods.
348 //===----------------------------------------------------------------------===//
349 
350 /// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
351 /// method that is used for all SourceManager queries that start with a
352 /// SourceLocation object.  It is responsible for finding the entry in
353 /// SLocEntryTable which contains the specified location.
354 ///
355 FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
356   assert(SLocOffset && "Invalid FileID");
357 
358   // After the first and second level caches, I see two common sorts of
359   // behavior: 1) a lot of searched FileID's are "near" the cached file location
360   // or are "near" the cached instantiation location.  2) others are just
361   // completely random and may be a very long way away.
362   //
363   // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
364   // then we fall back to a less cache efficient, but more scalable, binary
365   // search to find the location.
366 
367   // See if this is near the file point - worst case we start scanning from the
368   // most newly created FileID.
369   std::vector<SrcMgr::SLocEntry>::const_iterator I;
370 
371   if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
372     // Neither loc prunes our search.
373     I = SLocEntryTable.end();
374   } else {
375     // Perhaps it is near the file point.
376     I = SLocEntryTable.begin()+LastFileIDLookup.ID;
377   }
378 
379   // Find the FileID that contains this.  "I" is an iterator that points to a
380   // FileID whose offset is known to be larger than SLocOffset.
381   unsigned NumProbes = 0;
382   while (1) {
383     --I;
384     if (I->getOffset() <= SLocOffset) {
385 #if 0
386       printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
387              I-SLocEntryTable.begin(),
388              I->isInstantiation() ? "inst" : "file",
389              LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
390 #endif
391       FileID Res = FileID::get(I-SLocEntryTable.begin());
392 
393       // If this isn't an instantiation, remember it.  We have good locality
394       // across FileID lookups.
395       if (!I->isInstantiation())
396         LastFileIDLookup = Res;
397       NumLinearScans += NumProbes+1;
398       return Res;
399     }
400     if (++NumProbes == 8)
401       break;
402   }
403 
404   // Convert "I" back into an index.  We know that it is an entry whose index is
405   // larger than the offset we are looking for.
406   unsigned GreaterIndex = I-SLocEntryTable.begin();
407   // LessIndex - This is the lower bound of the range that we're searching.
408   // We know that the offset corresponding to the FileID is is less than
409   // SLocOffset.
410   unsigned LessIndex = 0;
411   NumProbes = 0;
412   while (1) {
413     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
414     unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
415 
416     ++NumProbes;
417 
418     // If the offset of the midpoint is too large, chop the high side of the
419     // range to the midpoint.
420     if (MidOffset > SLocOffset) {
421       GreaterIndex = MiddleIndex;
422       continue;
423     }
424 
425     // If the middle index contains the value, succeed and return.
426     if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
427 #if 0
428       printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
429              I-SLocEntryTable.begin(),
430              I->isInstantiation() ? "inst" : "file",
431              LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
432 #endif
433       FileID Res = FileID::get(MiddleIndex);
434 
435       // If this isn't an instantiation, remember it.  We have good locality
436       // across FileID lookups.
437       if (!I->isInstantiation())
438         LastFileIDLookup = Res;
439       NumBinaryProbes += NumProbes;
440       return Res;
441     }
442 
443     // Otherwise, move the low-side up to the middle index.
444     LessIndex = MiddleIndex;
445   }
446 }
447 
448 SourceLocation SourceManager::
449 getInstantiationLocSlowCase(SourceLocation Loc) const {
450   do {
451     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
452     Loc = getSLocEntry(LocInfo.first).getInstantiation()
453                    .getInstantiationLocStart();
454     Loc = Loc.getFileLocWithOffset(LocInfo.second);
455   } while (!Loc.isFileID());
456 
457   return Loc;
458 }
459 
460 SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
461   do {
462     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
463     Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
464     Loc = Loc.getFileLocWithOffset(LocInfo.second);
465   } while (!Loc.isFileID());
466   return Loc;
467 }
468 
469 
470 std::pair<FileID, unsigned>
471 SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
472                                                      unsigned Offset) const {
473   // If this is an instantiation record, walk through all the instantiation
474   // points.
475   FileID FID;
476   SourceLocation Loc;
477   do {
478     Loc = E->getInstantiation().getInstantiationLocStart();
479 
480     FID = getFileID(Loc);
481     E = &getSLocEntry(FID);
482     Offset += Loc.getOffset()-E->getOffset();
483   } while (!Loc.isFileID());
484 
485   return std::make_pair(FID, Offset);
486 }
487 
488 std::pair<FileID, unsigned>
489 SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
490                                                 unsigned Offset) const {
491   // If this is an instantiation record, walk through all the instantiation
492   // points.
493   FileID FID;
494   SourceLocation Loc;
495   do {
496     Loc = E->getInstantiation().getSpellingLoc();
497 
498     FID = getFileID(Loc);
499     E = &getSLocEntry(FID);
500     Offset += Loc.getOffset()-E->getOffset();
501   } while (!Loc.isFileID());
502 
503   return std::make_pair(FID, Offset);
504 }
505 
506 /// getImmediateSpellingLoc - Given a SourceLocation object, return the
507 /// spelling location referenced by the ID.  This is the first level down
508 /// towards the place where the characters that make up the lexed token can be
509 /// found.  This should not generally be used by clients.
510 SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
511   if (Loc.isFileID()) return Loc;
512   std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
513   Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
514   return Loc.getFileLocWithOffset(LocInfo.second);
515 }
516 
517 
518 /// getImmediateInstantiationRange - Loc is required to be an instantiation
519 /// location.  Return the start/end of the instantiation information.
520 std::pair<SourceLocation,SourceLocation>
521 SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
522   assert(Loc.isMacroID() && "Not an instantiation loc!");
523   const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
524   return II.getInstantiationLocRange();
525 }
526 
527 /// getInstantiationRange - Given a SourceLocation object, return the
528 /// range of tokens covered by the instantiation in the ultimate file.
529 std::pair<SourceLocation,SourceLocation>
530 SourceManager::getInstantiationRange(SourceLocation Loc) const {
531   if (Loc.isFileID()) return std::make_pair(Loc, Loc);
532 
533   std::pair<SourceLocation,SourceLocation> Res =
534     getImmediateInstantiationRange(Loc);
535 
536   // Fully resolve the start and end locations to their ultimate instantiation
537   // points.
538   while (!Res.first.isFileID())
539     Res.first = getImmediateInstantiationRange(Res.first).first;
540   while (!Res.second.isFileID())
541     Res.second = getImmediateInstantiationRange(Res.second).second;
542   return Res;
543 }
544 
545 
546 
547 //===----------------------------------------------------------------------===//
548 // Queries about the code at a SourceLocation.
549 //===----------------------------------------------------------------------===//
550 
551 /// getCharacterData - Return a pointer to the start of the specified location
552 /// in the appropriate MemoryBuffer.
553 const char *SourceManager::getCharacterData(SourceLocation SL) const {
554   // Note that this is a hot function in the getSpelling() path, which is
555   // heavily used by -E mode.
556   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
557 
558   // Note that calling 'getBuffer()' may lazily page in a source file.
559   return getSLocEntry(LocInfo.first).getFile().getContentCache()
560               ->getBuffer()->getBufferStart() + LocInfo.second;
561 }
562 
563 
564 /// getColumnNumber - Return the column # for the specified file position.
565 /// this is significantly cheaper to compute than the line number.
566 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
567   const char *Buf = getBuffer(FID)->getBufferStart();
568 
569   unsigned LineStart = FilePos;
570   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
571     --LineStart;
572   return FilePos-LineStart+1;
573 }
574 
575 unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
576   if (Loc.isInvalid()) return 0;
577   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
578   return getColumnNumber(LocInfo.first, LocInfo.second);
579 }
580 
581 unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
582   if (Loc.isInvalid()) return 0;
583   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
584   return getColumnNumber(LocInfo.first, LocInfo.second);
585 }
586 
587 
588 
589 static void ComputeLineNumbers(ContentCache* FI,
590                                llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
591 static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
592   // Note that calling 'getBuffer()' may lazily page in the file.
593   const MemoryBuffer *Buffer = FI->getBuffer();
594 
595   // Find the file offsets of all of the *physical* source lines.  This does
596   // not look at trigraphs, escaped newlines, or anything else tricky.
597   std::vector<unsigned> LineOffsets;
598 
599   // Line #1 starts at char 0.
600   LineOffsets.push_back(0);
601 
602   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
603   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
604   unsigned Offs = 0;
605   while (1) {
606     // Skip over the contents of the line.
607     // TODO: Vectorize this?  This is very performance sensitive for programs
608     // with lots of diagnostics and in -E mode.
609     const unsigned char *NextBuf = (const unsigned char *)Buf;
610     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
611       ++NextBuf;
612     Offs += NextBuf-Buf;
613     Buf = NextBuf;
614 
615     if (Buf[0] == '\n' || Buf[0] == '\r') {
616       // If this is \n\r or \r\n, skip both characters.
617       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
618         ++Offs, ++Buf;
619       ++Offs, ++Buf;
620       LineOffsets.push_back(Offs);
621     } else {
622       // Otherwise, this is a null.  If end of file, exit.
623       if (Buf == End) break;
624       // Otherwise, skip the null.
625       ++Offs, ++Buf;
626     }
627   }
628 
629   // Copy the offsets into the FileInfo structure.
630   FI->NumLines = LineOffsets.size();
631   FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
632   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
633 }
634 
635 /// getLineNumber - Given a SourceLocation, return the spelling line number
636 /// for the position indicated.  This requires building and caching a table of
637 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
638 /// about to emit a diagnostic.
639 unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
640   ContentCache *Content;
641   if (LastLineNoFileIDQuery == FID)
642     Content = LastLineNoContentCache;
643   else
644     Content = const_cast<ContentCache*>(getSLocEntry(FID)
645                                         .getFile().getContentCache());
646 
647   // If this is the first use of line information for this buffer, compute the
648   /// SourceLineCache for it on demand.
649   if (Content->SourceLineCache == 0)
650     ComputeLineNumbers(Content, ContentCacheAlloc);
651 
652   // Okay, we know we have a line number table.  Do a binary search to find the
653   // line number that this character position lands on.
654   unsigned *SourceLineCache = Content->SourceLineCache;
655   unsigned *SourceLineCacheStart = SourceLineCache;
656   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
657 
658   unsigned QueriedFilePos = FilePos+1;
659 
660   // If the previous query was to the same file, we know both the file pos from
661   // that query and the line number returned.  This allows us to narrow the
662   // search space from the entire file to something near the match.
663   if (LastLineNoFileIDQuery == FID) {
664     if (QueriedFilePos >= LastLineNoFilePos) {
665       SourceLineCache = SourceLineCache+LastLineNoResult-1;
666 
667       // The query is likely to be nearby the previous one.  Here we check to
668       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
669       // where big comment blocks and vertical whitespace eat up lines but
670       // contribute no tokens.
671       if (SourceLineCache+5 < SourceLineCacheEnd) {
672         if (SourceLineCache[5] > QueriedFilePos)
673           SourceLineCacheEnd = SourceLineCache+5;
674         else if (SourceLineCache+10 < SourceLineCacheEnd) {
675           if (SourceLineCache[10] > QueriedFilePos)
676             SourceLineCacheEnd = SourceLineCache+10;
677           else if (SourceLineCache+20 < SourceLineCacheEnd) {
678             if (SourceLineCache[20] > QueriedFilePos)
679               SourceLineCacheEnd = SourceLineCache+20;
680           }
681         }
682       }
683     } else {
684       SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
685     }
686   }
687 
688   // If the spread is large, do a "radix" test as our initial guess, based on
689   // the assumption that lines average to approximately the same length.
690   // NOTE: This is currently disabled, as it does not appear to be profitable in
691   // initial measurements.
692   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
693     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
694 
695     // Take a stab at guessing where it is.
696     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
697 
698     // Check for -10 and +10 lines.
699     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
700     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
701 
702     // If the computed lower bound is less than the query location, move it in.
703     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
704         SourceLineCacheStart[LowerBound] < QueriedFilePos)
705       SourceLineCache = SourceLineCacheStart+LowerBound;
706 
707     // If the computed upper bound is greater than the query location, move it.
708     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
709         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
710       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
711   }
712 
713   unsigned *Pos
714     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
715   unsigned LineNo = Pos-SourceLineCacheStart;
716 
717   LastLineNoFileIDQuery = FID;
718   LastLineNoContentCache = Content;
719   LastLineNoFilePos = QueriedFilePos;
720   LastLineNoResult = LineNo;
721   return LineNo;
722 }
723 
724 unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
725   if (Loc.isInvalid()) return 0;
726   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
727   return getLineNumber(LocInfo.first, LocInfo.second);
728 }
729 unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
730   if (Loc.isInvalid()) return 0;
731   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
732   return getLineNumber(LocInfo.first, LocInfo.second);
733 }
734 
735 /// getFileCharacteristic - return the file characteristic of the specified
736 /// source location, indicating whether this is a normal file, a system
737 /// header, or an "implicit extern C" system header.
738 ///
739 /// This state can be modified with flags on GNU linemarker directives like:
740 ///   # 4 "foo.h" 3
741 /// which changes all source locations in the current file after that to be
742 /// considered to be from a system header.
743 SrcMgr::CharacteristicKind
744 SourceManager::getFileCharacteristic(SourceLocation Loc) const {
745   assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
746   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
747   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
748 
749   // If there are no #line directives in this file, just return the whole-file
750   // state.
751   if (!FI.hasLineDirectives())
752     return FI.getFileCharacteristic();
753 
754   assert(LineTable && "Can't have linetable entries without a LineTable!");
755   // See if there is a #line directive before the location.
756   const LineEntry *Entry =
757     LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
758 
759   // If this is before the first line marker, use the file characteristic.
760   if (!Entry)
761     return FI.getFileCharacteristic();
762 
763   return Entry->FileKind;
764 }
765 
766 /// Return the filename or buffer identifier of the buffer the location is in.
767 /// Note that this name does not respect #line directives.  Use getPresumedLoc
768 /// for normal clients.
769 const char *SourceManager::getBufferName(SourceLocation Loc) const {
770   if (Loc.isInvalid()) return "<invalid loc>";
771 
772   return getBuffer(getFileID(Loc))->getBufferIdentifier();
773 }
774 
775 
776 /// getPresumedLoc - This method returns the "presumed" location of a
777 /// SourceLocation specifies.  A "presumed location" can be modified by #line
778 /// or GNU line marker directives.  This provides a view on the data that a
779 /// user should see in diagnostics, for example.
780 ///
781 /// Note that a presumed location is always given as the instantiation point
782 /// of an instantiation location, not at the spelling location.
783 PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
784   if (Loc.isInvalid()) return PresumedLoc();
785 
786   // Presumed locations are always for instantiation points.
787   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
788 
789   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
790   const SrcMgr::ContentCache *C = FI.getContentCache();
791 
792   // To get the source name, first consult the FileEntry (if one exists)
793   // before the MemBuffer as this will avoid unnecessarily paging in the
794   // MemBuffer.
795   const char *Filename =
796     C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
797   unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
798   unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
799   SourceLocation IncludeLoc = FI.getIncludeLoc();
800 
801   // If we have #line directives in this file, update and overwrite the physical
802   // location info if appropriate.
803   if (FI.hasLineDirectives()) {
804     assert(LineTable && "Can't have linetable entries without a LineTable!");
805     // See if there is a #line directive before this.  If so, get it.
806     if (const LineEntry *Entry =
807           LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
808       // If the LineEntry indicates a filename, use it.
809       if (Entry->FilenameID != -1)
810         Filename = LineTable->getFilename(Entry->FilenameID);
811 
812       // Use the line number specified by the LineEntry.  This line number may
813       // be multiple lines down from the line entry.  Add the difference in
814       // physical line numbers from the query point and the line marker to the
815       // total.
816       unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
817       LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
818 
819       // Note that column numbers are not molested by line markers.
820 
821       // Handle virtual #include manipulation.
822       if (Entry->IncludeOffset) {
823         IncludeLoc = getLocForStartOfFile(LocInfo.first);
824         IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
825       }
826     }
827   }
828 
829   return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
830 }
831 
832 //===----------------------------------------------------------------------===//
833 // Other miscellaneous methods.
834 //===----------------------------------------------------------------------===//
835 
836 
837 /// PrintStats - Print statistics to stderr.
838 ///
839 void SourceManager::PrintStats() const {
840   llvm::cerr << "\n*** Source Manager Stats:\n";
841   llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
842              << " mem buffers mapped.\n";
843   llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, "
844              << NextOffset << "B of Sloc address space used.\n";
845 
846   unsigned NumLineNumsComputed = 0;
847   unsigned NumFileBytesMapped = 0;
848   for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
849     NumLineNumsComputed += I->second->SourceLineCache != 0;
850     NumFileBytesMapped  += I->second->getSizeBytesMapped();
851   }
852 
853   llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
854              << NumLineNumsComputed << " files with line #'s computed.\n";
855   llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
856              << NumBinaryProbes << " binary.\n";
857 }
858 
859 //===----------------------------------------------------------------------===//
860 // Serialization.
861 //===----------------------------------------------------------------------===//
862 
863 void ContentCache::Emit(llvm::Serializer& S) const {
864   S.FlushRecord();
865   S.EmitPtr(this);
866 
867   if (Entry) {
868     llvm::sys::Path Fname(Buffer->getBufferIdentifier());
869 
870     if (Fname.isAbsolute())
871       S.EmitCStr(Fname.c_str());
872     else {
873       // Create an absolute path.
874       // FIXME: This will potentially contain ".." and "." in the path.
875       llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
876       path.appendComponent(Fname.c_str());
877       S.EmitCStr(path.c_str());
878     }
879   }
880   else {
881     const char* p = Buffer->getBufferStart();
882     const char* e = Buffer->getBufferEnd();
883 
884     S.EmitInt(e-p);
885 
886     for ( ; p != e; ++p)
887       S.EmitInt(*p);
888   }
889 
890   S.FlushRecord();
891 }
892 
893 void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
894                                        SourceManager& SMgr,
895                                        FileManager* FMgr,
896                                        std::vector<char>& Buf) {
897   if (FMgr) {
898     llvm::SerializedPtrID PtrID = D.ReadPtrID();
899     D.ReadCStr(Buf,false);
900 
901     // Create/fetch the FileEntry.
902     const char* start = &Buf[0];
903     const FileEntry* E = FMgr->getFile(start,start+Buf.size());
904 
905     // FIXME: Ideally we want a lazy materialization of the ContentCache
906     //  anyway, because we don't want to read in source files unless this
907     //  is absolutely needed.
908     if (!E)
909       D.RegisterPtr(PtrID,NULL);
910     else
911       // Get the ContextCache object and register it with the deserializer.
912       D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
913     return;
914   }
915 
916   // Register the ContextCache object with the deserializer.
917   /* FIXME:
918   ContentCache *Entry
919   SMgr.MemBufferInfos.push_back(ContentCache());
920    = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
921   D.RegisterPtr(&Entry);
922 
923   // Create the buffer.
924   unsigned Size = D.ReadInt();
925   Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
926 
927   // Read the contents of the buffer.
928   char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
929   for (unsigned i = 0; i < Size ; ++i)
930     p[i] = D.ReadInt();
931    */
932 }
933 
934 void SourceManager::Emit(llvm::Serializer& S) const {
935   S.EnterBlock();
936   S.EmitPtr(this);
937   S.EmitInt(MainFileID.getOpaqueValue());
938 
939   // Emit: FileInfos.  Just emit the file name.
940   S.EnterBlock();
941 
942   // FIXME: Emit FileInfos.
943   //std::for_each(FileInfos.begin(), FileInfos.end(),
944   //              S.MakeEmitter<ContentCache>());
945 
946   S.ExitBlock();
947 
948   // Emit: MemBufferInfos
949   S.EnterBlock();
950 
951   /* FIXME: EMIT.
952   std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
953                 S.MakeEmitter<ContentCache>());
954    */
955 
956   S.ExitBlock();
957 
958   // FIXME: Emit SLocEntryTable.
959 
960   S.ExitBlock();
961 }
962 
963 SourceManager*
964 SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) {
965   SourceManager *M = new SourceManager();
966   D.RegisterPtr(M);
967 
968   // Read: the FileID of the main source file of the translation unit.
969   M->MainFileID = FileID::get(D.ReadInt());
970 
971   std::vector<char> Buf;
972 
973   /*{ // FIXME Read: FileInfos.
974     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
975     while (!D.FinishedBlock(BLoc))
976     ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
977   }*/
978 
979   /*{ // FIXME Read: MemBufferInfos.
980     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
981     while (!D.FinishedBlock(BLoc))
982     ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
983     }*/
984 
985   // FIXME: Read SLocEntryTable.
986 
987   return M;
988 }
989