xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision 4c7626e7b6824fd088f1ac1a496a13e9d3886089)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/SourceManagerInternals.h"
16 #include "clang/Basic/FileManager.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/System/Path.h"
20 #include "llvm/Bitcode/Serialize.h"
21 #include "llvm/Bitcode/Deserialize.h"
22 #include "llvm/Support/Streams.h"
23 #include <algorithm>
24 using namespace clang;
25 using namespace SrcMgr;
26 using llvm::MemoryBuffer;
27 
28 //===----------------------------------------------------------------------===//
29 // SourceManager Helper Classes
30 //===----------------------------------------------------------------------===//
31 
32 ContentCache::~ContentCache() {
33   delete Buffer;
34 }
35 
36 /// getSizeBytesMapped - Returns the number of bytes actually mapped for
37 ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
38 ///  instantiated.
39 unsigned ContentCache::getSizeBytesMapped() const {
40   return Buffer ? Buffer->getBufferSize() : 0;
41 }
42 
43 /// getSize - Returns the size of the content encapsulated by this ContentCache.
44 ///  This can be the size of the source file or the size of an arbitrary
45 ///  scratch buffer.  If the ContentCache encapsulates a source file, that
46 ///  file is not lazily brought in from disk to satisfy this query.
47 unsigned ContentCache::getSize() const {
48   return Entry ? Entry->getSize() : Buffer->getBufferSize();
49 }
50 
51 const llvm::MemoryBuffer *ContentCache::getBuffer() const {
52   // Lazily create the Buffer for ContentCaches that wrap files.
53   if (!Buffer && Entry) {
54     // FIXME: Should we support a way to not have to do this check over
55     //   and over if we cannot open the file?
56     Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
57   }
58   return Buffer;
59 }
60 
61 unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) {
62   // Look up the filename in the string table, returning the pre-existing value
63   // if it exists.
64   llvm::StringMapEntry<unsigned> &Entry =
65     FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U);
66   if (Entry.getValue() != ~0U)
67     return Entry.getValue();
68 
69   // Otherwise, assign this the next available ID.
70   Entry.setValue(FilenamesByID.size());
71   FilenamesByID.push_back(&Entry);
72   return FilenamesByID.size()-1;
73 }
74 
75 /// AddLineNote - Add a line note to the line table that indicates that there
76 /// is a #line at the specified FID/Offset location which changes the presumed
77 /// location to LineNo/FilenameID.
78 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
79                                 unsigned LineNo, int FilenameID) {
80   std::vector<LineEntry> &Entries = LineEntries[FID];
81 
82   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
83          "Adding line entries out of order!");
84 
85   SrcMgr::CharacteristicKind Kind = SrcMgr::C_User;
86   unsigned IncludeOffset = 0;
87 
88   if (!Entries.empty()) {
89     // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember
90     // that we are still in "foo.h".
91     if (FilenameID == -1)
92       FilenameID = Entries.back().FilenameID;
93 
94     // If we are after a line marker that switched us to system header mode, or
95     // that set #include information, preserve it.
96     Kind = Entries.back().FileKind;
97     IncludeOffset = Entries.back().IncludeOffset;
98   }
99 
100   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind,
101                                    IncludeOffset));
102 }
103 
104 /// AddLineNote This is the same as the previous version of AddLineNote, but is
105 /// used for GNU line markers.  If EntryExit is 0, then this doesn't change the
106 /// presumed #include stack.  If it is 1, this is a file entry, if it is 2 then
107 /// this is a file exit.  FileKind specifies whether this is a system header or
108 /// extern C system header.
109 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset,
110                                 unsigned LineNo, int FilenameID,
111                                 unsigned EntryExit,
112                                 SrcMgr::CharacteristicKind FileKind) {
113   assert(FilenameID != -1 && "Unspecified filename should use other accessor");
114 
115   std::vector<LineEntry> &Entries = LineEntries[FID];
116 
117   assert((Entries.empty() || Entries.back().FileOffset < Offset) &&
118          "Adding line entries out of order!");
119 
120   unsigned IncludeOffset = 0;
121   if (EntryExit == 0) {  // No #include stack change.
122     IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset;
123   } else if (EntryExit == 1) {
124     IncludeOffset = Offset-1;
125   } else if (EntryExit == 2) {
126     assert(!Entries.empty() && Entries.back().IncludeOffset &&
127        "PPDirectives should have caught case when popping empty include stack");
128 
129     // Get the include loc of the last entries' include loc as our include loc.
130     IncludeOffset = 0;
131     if (const LineEntry *PrevEntry =
132           FindNearestLineEntry(FID, Entries.back().IncludeOffset))
133       IncludeOffset = PrevEntry->IncludeOffset;
134   }
135 
136   Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind,
137                                    IncludeOffset));
138 }
139 
140 
141 /// FindNearestLineEntry - Find the line entry nearest to FID that is before
142 /// it.  If there is no line entry before Offset in FID, return null.
143 const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID,
144                                                      unsigned Offset) {
145   const std::vector<LineEntry> &Entries = LineEntries[FID];
146   assert(!Entries.empty() && "No #line entries for this FID after all!");
147 
148   // It is very common for the query to be after the last #line, check this
149   // first.
150   if (Entries.back().FileOffset <= Offset)
151     return &Entries.back();
152 
153   // Do a binary search to find the maximal element that is still before Offset.
154   std::vector<LineEntry>::const_iterator I =
155     std::upper_bound(Entries.begin(), Entries.end(), Offset);
156   if (I == Entries.begin()) return 0;
157   return &*--I;
158 }
159 
160 /// \brief Add a new line entry that has already been encoded into
161 /// the internal representation of the line table.
162 void LineTableInfo::AddEntry(unsigned FID,
163                              const std::vector<LineEntry> &Entries) {
164   LineEntries[FID] = Entries;
165 }
166 
167 /// getLineTableFilenameID - Return the uniqued ID for the specified filename.
168 ///
169 unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) {
170   if (LineTable == 0)
171     LineTable = new LineTableInfo();
172   return LineTable->getLineTableFilenameID(Ptr, Len);
173 }
174 
175 
176 /// AddLineNote - Add a line note to the line table for the FileID and offset
177 /// specified by Loc.  If FilenameID is -1, it is considered to be
178 /// unspecified.
179 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
180                                 int FilenameID) {
181   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
182 
183   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
184 
185   // Remember that this file has #line directives now if it doesn't already.
186   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
187 
188   if (LineTable == 0)
189     LineTable = new LineTableInfo();
190   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID);
191 }
192 
193 /// AddLineNote - Add a GNU line marker to the line table.
194 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo,
195                                 int FilenameID, bool IsFileEntry,
196                                 bool IsFileExit, bool IsSystemHeader,
197                                 bool IsExternCHeader) {
198   // If there is no filename and no flags, this is treated just like a #line,
199   // which does not change the flags of the previous line marker.
200   if (FilenameID == -1) {
201     assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader &&
202            "Can't set flags without setting the filename!");
203     return AddLineNote(Loc, LineNo, FilenameID);
204   }
205 
206   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
207   const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile();
208 
209   // Remember that this file has #line directives now if it doesn't already.
210   const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives();
211 
212   if (LineTable == 0)
213     LineTable = new LineTableInfo();
214 
215   SrcMgr::CharacteristicKind FileKind;
216   if (IsExternCHeader)
217     FileKind = SrcMgr::C_ExternCSystem;
218   else if (IsSystemHeader)
219     FileKind = SrcMgr::C_System;
220   else
221     FileKind = SrcMgr::C_User;
222 
223   unsigned EntryExit = 0;
224   if (IsFileEntry)
225     EntryExit = 1;
226   else if (IsFileExit)
227     EntryExit = 2;
228 
229   LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID,
230                          EntryExit, FileKind);
231 }
232 
233 LineTableInfo &SourceManager::getLineTable() {
234   if (LineTable == 0)
235     LineTable = new LineTableInfo();
236   return *LineTable;
237 }
238 
239 //===----------------------------------------------------------------------===//
240 // Private 'Create' methods.
241 //===----------------------------------------------------------------------===//
242 
243 SourceManager::~SourceManager() {
244   delete LineTable;
245 
246   // Delete FileEntry objects corresponding to content caches.  Since the actual
247   // content cache objects are bump pointer allocated, we just have to run the
248   // dtors, but we call the deallocate method for completeness.
249   for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) {
250     MemBufferInfos[i]->~ContentCache();
251     ContentCacheAlloc.Deallocate(MemBufferInfos[i]);
252   }
253   for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator
254        I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
255     I->second->~ContentCache();
256     ContentCacheAlloc.Deallocate(I->second);
257   }
258 }
259 
260 void SourceManager::clearIDTables() {
261   MainFileID = FileID();
262   SLocEntryTable.clear();
263   LastLineNoFileIDQuery = FileID();
264   LastLineNoContentCache = 0;
265   LastFileIDLookup = FileID();
266 
267   if (LineTable)
268     LineTable->clear();
269 
270   // Use up FileID #0 as an invalid instantiation.
271   NextOffset = 0;
272   createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1);
273 }
274 
275 /// getOrCreateContentCache - Create or return a cached ContentCache for the
276 /// specified file.
277 const ContentCache *
278 SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) {
279   assert(FileEnt && "Didn't specify a file entry to use?");
280 
281   // Do we already have information about this file?
282   ContentCache *&Entry = FileInfos[FileEnt];
283   if (Entry) return Entry;
284 
285   // Nope, create a new Cache entry.  Make sure it is at least 8-byte aligned
286   // so that FileInfo can use the low 3 bits of the pointer for its own
287   // nefarious purposes.
288   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
289   EntryAlign = std::max(8U, EntryAlign);
290   Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
291   new (Entry) ContentCache(FileEnt);
292   return Entry;
293 }
294 
295 
296 /// createMemBufferContentCache - Create a new ContentCache for the specified
297 ///  memory buffer.  This does no caching.
298 const ContentCache*
299 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
300   // Add a new ContentCache to the MemBufferInfos list and return it.  Make sure
301   // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of
302   // the pointer for its own nefarious purposes.
303   unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment;
304   EntryAlign = std::max(8U, EntryAlign);
305   ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign);
306   new (Entry) ContentCache();
307   MemBufferInfos.push_back(Entry);
308   Entry->setBuffer(Buffer);
309   return Entry;
310 }
311 
312 //===----------------------------------------------------------------------===//
313 // Methods to create new FileID's and instantiations.
314 //===----------------------------------------------------------------------===//
315 
316 /// createFileID - Create a new fileID for the specified ContentCache and
317 /// include position.  This works regardless of whether the ContentCache
318 /// corresponds to a file or some other input source.
319 FileID SourceManager::createFileID(const ContentCache *File,
320                                    SourceLocation IncludePos,
321                                    SrcMgr::CharacteristicKind FileCharacter) {
322   SLocEntryTable.push_back(SLocEntry::get(NextOffset,
323                                           FileInfo::get(IncludePos, File,
324                                                         FileCharacter)));
325   unsigned FileSize = File->getSize();
326   assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!");
327   NextOffset += FileSize+1;
328 
329   // Set LastFileIDLookup to the newly created file.  The next getFileID call is
330   // almost guaranteed to be from that file.
331   return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1);
332 }
333 
334 /// createInstantiationLoc - Return a new SourceLocation that encodes the fact
335 /// that a token from SpellingLoc should actually be referenced from
336 /// InstantiationLoc.
337 SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc,
338                                                      SourceLocation ILocStart,
339                                                      SourceLocation ILocEnd,
340                                                      unsigned TokLength) {
341   InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc);
342   SLocEntryTable.push_back(SLocEntry::get(NextOffset, II));
343   assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!");
344   NextOffset += TokLength+1;
345   return SourceLocation::getMacroLoc(NextOffset-(TokLength+1));
346 }
347 
348 /// getBufferData - Return a pointer to the start and end of the source buffer
349 /// data for the specified FileID.
350 std::pair<const char*, const char*>
351 SourceManager::getBufferData(FileID FID) const {
352   const llvm::MemoryBuffer *Buf = getBuffer(FID);
353   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
354 }
355 
356 
357 //===----------------------------------------------------------------------===//
358 // SourceLocation manipulation methods.
359 //===----------------------------------------------------------------------===//
360 
361 /// getFileIDSlow - Return the FileID for a SourceLocation.  This is a very hot
362 /// method that is used for all SourceManager queries that start with a
363 /// SourceLocation object.  It is responsible for finding the entry in
364 /// SLocEntryTable which contains the specified location.
365 ///
366 FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const {
367   assert(SLocOffset && "Invalid FileID");
368 
369   // After the first and second level caches, I see two common sorts of
370   // behavior: 1) a lot of searched FileID's are "near" the cached file location
371   // or are "near" the cached instantiation location.  2) others are just
372   // completely random and may be a very long way away.
373   //
374   // To handle this, we do a linear search for up to 8 steps to catch #1 quickly
375   // then we fall back to a less cache efficient, but more scalable, binary
376   // search to find the location.
377 
378   // See if this is near the file point - worst case we start scanning from the
379   // most newly created FileID.
380   std::vector<SrcMgr::SLocEntry>::const_iterator I;
381 
382   if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) {
383     // Neither loc prunes our search.
384     I = SLocEntryTable.end();
385   } else {
386     // Perhaps it is near the file point.
387     I = SLocEntryTable.begin()+LastFileIDLookup.ID;
388   }
389 
390   // Find the FileID that contains this.  "I" is an iterator that points to a
391   // FileID whose offset is known to be larger than SLocOffset.
392   unsigned NumProbes = 0;
393   while (1) {
394     --I;
395     if (I->getOffset() <= SLocOffset) {
396 #if 0
397       printf("lin %d -> %d [%s] %d %d\n", SLocOffset,
398              I-SLocEntryTable.begin(),
399              I->isInstantiation() ? "inst" : "file",
400              LastFileIDLookup.ID,  int(SLocEntryTable.end()-I));
401 #endif
402       FileID Res = FileID::get(I-SLocEntryTable.begin());
403 
404       // If this isn't an instantiation, remember it.  We have good locality
405       // across FileID lookups.
406       if (!I->isInstantiation())
407         LastFileIDLookup = Res;
408       NumLinearScans += NumProbes+1;
409       return Res;
410     }
411     if (++NumProbes == 8)
412       break;
413   }
414 
415   // Convert "I" back into an index.  We know that it is an entry whose index is
416   // larger than the offset we are looking for.
417   unsigned GreaterIndex = I-SLocEntryTable.begin();
418   // LessIndex - This is the lower bound of the range that we're searching.
419   // We know that the offset corresponding to the FileID is is less than
420   // SLocOffset.
421   unsigned LessIndex = 0;
422   NumProbes = 0;
423   while (1) {
424     unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex;
425     unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset();
426 
427     ++NumProbes;
428 
429     // If the offset of the midpoint is too large, chop the high side of the
430     // range to the midpoint.
431     if (MidOffset > SLocOffset) {
432       GreaterIndex = MiddleIndex;
433       continue;
434     }
435 
436     // If the middle index contains the value, succeed and return.
437     if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) {
438 #if 0
439       printf("bin %d -> %d [%s] %d %d\n", SLocOffset,
440              I-SLocEntryTable.begin(),
441              I->isInstantiation() ? "inst" : "file",
442              LastFileIDLookup.ID, int(SLocEntryTable.end()-I));
443 #endif
444       FileID Res = FileID::get(MiddleIndex);
445 
446       // If this isn't an instantiation, remember it.  We have good locality
447       // across FileID lookups.
448       if (!I->isInstantiation())
449         LastFileIDLookup = Res;
450       NumBinaryProbes += NumProbes;
451       return Res;
452     }
453 
454     // Otherwise, move the low-side up to the middle index.
455     LessIndex = MiddleIndex;
456   }
457 }
458 
459 SourceLocation SourceManager::
460 getInstantiationLocSlowCase(SourceLocation Loc) const {
461   do {
462     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
463     Loc = getSLocEntry(LocInfo.first).getInstantiation()
464                    .getInstantiationLocStart();
465     Loc = Loc.getFileLocWithOffset(LocInfo.second);
466   } while (!Loc.isFileID());
467 
468   return Loc;
469 }
470 
471 SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const {
472   do {
473     std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
474     Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
475     Loc = Loc.getFileLocWithOffset(LocInfo.second);
476   } while (!Loc.isFileID());
477   return Loc;
478 }
479 
480 
481 std::pair<FileID, unsigned>
482 SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E,
483                                                      unsigned Offset) const {
484   // If this is an instantiation record, walk through all the instantiation
485   // points.
486   FileID FID;
487   SourceLocation Loc;
488   do {
489     Loc = E->getInstantiation().getInstantiationLocStart();
490 
491     FID = getFileID(Loc);
492     E = &getSLocEntry(FID);
493     Offset += Loc.getOffset()-E->getOffset();
494   } while (!Loc.isFileID());
495 
496   return std::make_pair(FID, Offset);
497 }
498 
499 std::pair<FileID, unsigned>
500 SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E,
501                                                 unsigned Offset) const {
502   // If this is an instantiation record, walk through all the instantiation
503   // points.
504   FileID FID;
505   SourceLocation Loc;
506   do {
507     Loc = E->getInstantiation().getSpellingLoc();
508 
509     FID = getFileID(Loc);
510     E = &getSLocEntry(FID);
511     Offset += Loc.getOffset()-E->getOffset();
512   } while (!Loc.isFileID());
513 
514   return std::make_pair(FID, Offset);
515 }
516 
517 /// getImmediateSpellingLoc - Given a SourceLocation object, return the
518 /// spelling location referenced by the ID.  This is the first level down
519 /// towards the place where the characters that make up the lexed token can be
520 /// found.  This should not generally be used by clients.
521 SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{
522   if (Loc.isFileID()) return Loc;
523   std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc);
524   Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc();
525   return Loc.getFileLocWithOffset(LocInfo.second);
526 }
527 
528 
529 /// getImmediateInstantiationRange - Loc is required to be an instantiation
530 /// location.  Return the start/end of the instantiation information.
531 std::pair<SourceLocation,SourceLocation>
532 SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const {
533   assert(Loc.isMacroID() && "Not an instantiation loc!");
534   const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation();
535   return II.getInstantiationLocRange();
536 }
537 
538 /// getInstantiationRange - Given a SourceLocation object, return the
539 /// range of tokens covered by the instantiation in the ultimate file.
540 std::pair<SourceLocation,SourceLocation>
541 SourceManager::getInstantiationRange(SourceLocation Loc) const {
542   if (Loc.isFileID()) return std::make_pair(Loc, Loc);
543 
544   std::pair<SourceLocation,SourceLocation> Res =
545     getImmediateInstantiationRange(Loc);
546 
547   // Fully resolve the start and end locations to their ultimate instantiation
548   // points.
549   while (!Res.first.isFileID())
550     Res.first = getImmediateInstantiationRange(Res.first).first;
551   while (!Res.second.isFileID())
552     Res.second = getImmediateInstantiationRange(Res.second).second;
553   return Res;
554 }
555 
556 
557 
558 //===----------------------------------------------------------------------===//
559 // Queries about the code at a SourceLocation.
560 //===----------------------------------------------------------------------===//
561 
562 /// getCharacterData - Return a pointer to the start of the specified location
563 /// in the appropriate MemoryBuffer.
564 const char *SourceManager::getCharacterData(SourceLocation SL) const {
565   // Note that this is a hot function in the getSpelling() path, which is
566   // heavily used by -E mode.
567   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL);
568 
569   // Note that calling 'getBuffer()' may lazily page in a source file.
570   return getSLocEntry(LocInfo.first).getFile().getContentCache()
571               ->getBuffer()->getBufferStart() + LocInfo.second;
572 }
573 
574 
575 /// getColumnNumber - Return the column # for the specified file position.
576 /// this is significantly cheaper to compute than the line number.
577 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const {
578   const char *Buf = getBuffer(FID)->getBufferStart();
579 
580   unsigned LineStart = FilePos;
581   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
582     --LineStart;
583   return FilePos-LineStart+1;
584 }
585 
586 unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const {
587   if (Loc.isInvalid()) return 0;
588   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
589   return getColumnNumber(LocInfo.first, LocInfo.second);
590 }
591 
592 unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const {
593   if (Loc.isInvalid()) return 0;
594   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
595   return getColumnNumber(LocInfo.first, LocInfo.second);
596 }
597 
598 
599 
600 static void ComputeLineNumbers(ContentCache* FI,
601                                llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE;
602 static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){
603   // Note that calling 'getBuffer()' may lazily page in the file.
604   const MemoryBuffer *Buffer = FI->getBuffer();
605 
606   // Find the file offsets of all of the *physical* source lines.  This does
607   // not look at trigraphs, escaped newlines, or anything else tricky.
608   std::vector<unsigned> LineOffsets;
609 
610   // Line #1 starts at char 0.
611   LineOffsets.push_back(0);
612 
613   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
614   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
615   unsigned Offs = 0;
616   while (1) {
617     // Skip over the contents of the line.
618     // TODO: Vectorize this?  This is very performance sensitive for programs
619     // with lots of diagnostics and in -E mode.
620     const unsigned char *NextBuf = (const unsigned char *)Buf;
621     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
622       ++NextBuf;
623     Offs += NextBuf-Buf;
624     Buf = NextBuf;
625 
626     if (Buf[0] == '\n' || Buf[0] == '\r') {
627       // If this is \n\r or \r\n, skip both characters.
628       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
629         ++Offs, ++Buf;
630       ++Offs, ++Buf;
631       LineOffsets.push_back(Offs);
632     } else {
633       // Otherwise, this is a null.  If end of file, exit.
634       if (Buf == End) break;
635       // Otherwise, skip the null.
636       ++Offs, ++Buf;
637     }
638   }
639 
640   // Copy the offsets into the FileInfo structure.
641   FI->NumLines = LineOffsets.size();
642   FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size());
643   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
644 }
645 
646 /// getLineNumber - Given a SourceLocation, return the spelling line number
647 /// for the position indicated.  This requires building and caching a table of
648 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
649 /// about to emit a diagnostic.
650 unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const {
651   ContentCache *Content;
652   if (LastLineNoFileIDQuery == FID)
653     Content = LastLineNoContentCache;
654   else
655     Content = const_cast<ContentCache*>(getSLocEntry(FID)
656                                         .getFile().getContentCache());
657 
658   // If this is the first use of line information for this buffer, compute the
659   /// SourceLineCache for it on demand.
660   if (Content->SourceLineCache == 0)
661     ComputeLineNumbers(Content, ContentCacheAlloc);
662 
663   // Okay, we know we have a line number table.  Do a binary search to find the
664   // line number that this character position lands on.
665   unsigned *SourceLineCache = Content->SourceLineCache;
666   unsigned *SourceLineCacheStart = SourceLineCache;
667   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
668 
669   unsigned QueriedFilePos = FilePos+1;
670 
671   // If the previous query was to the same file, we know both the file pos from
672   // that query and the line number returned.  This allows us to narrow the
673   // search space from the entire file to something near the match.
674   if (LastLineNoFileIDQuery == FID) {
675     if (QueriedFilePos >= LastLineNoFilePos) {
676       SourceLineCache = SourceLineCache+LastLineNoResult-1;
677 
678       // The query is likely to be nearby the previous one.  Here we check to
679       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
680       // where big comment blocks and vertical whitespace eat up lines but
681       // contribute no tokens.
682       if (SourceLineCache+5 < SourceLineCacheEnd) {
683         if (SourceLineCache[5] > QueriedFilePos)
684           SourceLineCacheEnd = SourceLineCache+5;
685         else if (SourceLineCache+10 < SourceLineCacheEnd) {
686           if (SourceLineCache[10] > QueriedFilePos)
687             SourceLineCacheEnd = SourceLineCache+10;
688           else if (SourceLineCache+20 < SourceLineCacheEnd) {
689             if (SourceLineCache[20] > QueriedFilePos)
690               SourceLineCacheEnd = SourceLineCache+20;
691           }
692         }
693       }
694     } else {
695       SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
696     }
697   }
698 
699   // If the spread is large, do a "radix" test as our initial guess, based on
700   // the assumption that lines average to approximately the same length.
701   // NOTE: This is currently disabled, as it does not appear to be profitable in
702   // initial measurements.
703   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
704     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
705 
706     // Take a stab at guessing where it is.
707     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
708 
709     // Check for -10 and +10 lines.
710     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
711     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
712 
713     // If the computed lower bound is less than the query location, move it in.
714     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
715         SourceLineCacheStart[LowerBound] < QueriedFilePos)
716       SourceLineCache = SourceLineCacheStart+LowerBound;
717 
718     // If the computed upper bound is greater than the query location, move it.
719     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
720         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
721       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
722   }
723 
724   unsigned *Pos
725     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
726   unsigned LineNo = Pos-SourceLineCacheStart;
727 
728   LastLineNoFileIDQuery = FID;
729   LastLineNoContentCache = Content;
730   LastLineNoFilePos = QueriedFilePos;
731   LastLineNoResult = LineNo;
732   return LineNo;
733 }
734 
735 unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const {
736   if (Loc.isInvalid()) return 0;
737   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
738   return getLineNumber(LocInfo.first, LocInfo.second);
739 }
740 unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const {
741   if (Loc.isInvalid()) return 0;
742   std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc);
743   return getLineNumber(LocInfo.first, LocInfo.second);
744 }
745 
746 /// getFileCharacteristic - return the file characteristic of the specified
747 /// source location, indicating whether this is a normal file, a system
748 /// header, or an "implicit extern C" system header.
749 ///
750 /// This state can be modified with flags on GNU linemarker directives like:
751 ///   # 4 "foo.h" 3
752 /// which changes all source locations in the current file after that to be
753 /// considered to be from a system header.
754 SrcMgr::CharacteristicKind
755 SourceManager::getFileCharacteristic(SourceLocation Loc) const {
756   assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!");
757   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
758   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
759 
760   // If there are no #line directives in this file, just return the whole-file
761   // state.
762   if (!FI.hasLineDirectives())
763     return FI.getFileCharacteristic();
764 
765   assert(LineTable && "Can't have linetable entries without a LineTable!");
766   // See if there is a #line directive before the location.
767   const LineEntry *Entry =
768     LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second);
769 
770   // If this is before the first line marker, use the file characteristic.
771   if (!Entry)
772     return FI.getFileCharacteristic();
773 
774   return Entry->FileKind;
775 }
776 
777 /// Return the filename or buffer identifier of the buffer the location is in.
778 /// Note that this name does not respect #line directives.  Use getPresumedLoc
779 /// for normal clients.
780 const char *SourceManager::getBufferName(SourceLocation Loc) const {
781   if (Loc.isInvalid()) return "<invalid loc>";
782 
783   return getBuffer(getFileID(Loc))->getBufferIdentifier();
784 }
785 
786 
787 /// getPresumedLoc - This method returns the "presumed" location of a
788 /// SourceLocation specifies.  A "presumed location" can be modified by #line
789 /// or GNU line marker directives.  This provides a view on the data that a
790 /// user should see in diagnostics, for example.
791 ///
792 /// Note that a presumed location is always given as the instantiation point
793 /// of an instantiation location, not at the spelling location.
794 PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const {
795   if (Loc.isInvalid()) return PresumedLoc();
796 
797   // Presumed locations are always for instantiation points.
798   std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc);
799 
800   const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile();
801   const SrcMgr::ContentCache *C = FI.getContentCache();
802 
803   // To get the source name, first consult the FileEntry (if one exists)
804   // before the MemBuffer as this will avoid unnecessarily paging in the
805   // MemBuffer.
806   const char *Filename =
807     C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
808   unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second);
809   unsigned ColNo  = getColumnNumber(LocInfo.first, LocInfo.second);
810   SourceLocation IncludeLoc = FI.getIncludeLoc();
811 
812   // If we have #line directives in this file, update and overwrite the physical
813   // location info if appropriate.
814   if (FI.hasLineDirectives()) {
815     assert(LineTable && "Can't have linetable entries without a LineTable!");
816     // See if there is a #line directive before this.  If so, get it.
817     if (const LineEntry *Entry =
818           LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) {
819       // If the LineEntry indicates a filename, use it.
820       if (Entry->FilenameID != -1)
821         Filename = LineTable->getFilename(Entry->FilenameID);
822 
823       // Use the line number specified by the LineEntry.  This line number may
824       // be multiple lines down from the line entry.  Add the difference in
825       // physical line numbers from the query point and the line marker to the
826       // total.
827       unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset);
828       LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1);
829 
830       // Note that column numbers are not molested by line markers.
831 
832       // Handle virtual #include manipulation.
833       if (Entry->IncludeOffset) {
834         IncludeLoc = getLocForStartOfFile(LocInfo.first);
835         IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset);
836       }
837     }
838   }
839 
840   return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc);
841 }
842 
843 //===----------------------------------------------------------------------===//
844 // Other miscellaneous methods.
845 //===----------------------------------------------------------------------===//
846 
847 
848 /// PrintStats - Print statistics to stderr.
849 ///
850 void SourceManager::PrintStats() const {
851   llvm::cerr << "\n*** Source Manager Stats:\n";
852   llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
853              << " mem buffers mapped.\n";
854   llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, "
855              << NextOffset << "B of Sloc address space used.\n";
856 
857   unsigned NumLineNumsComputed = 0;
858   unsigned NumFileBytesMapped = 0;
859   for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){
860     NumLineNumsComputed += I->second->SourceLineCache != 0;
861     NumFileBytesMapped  += I->second->getSizeBytesMapped();
862   }
863 
864   llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
865              << NumLineNumsComputed << " files with line #'s computed.\n";
866   llvm::cerr << "FileID scans: " << NumLinearScans << " linear, "
867              << NumBinaryProbes << " binary.\n";
868 }
869 
870 //===----------------------------------------------------------------------===//
871 // Serialization.
872 //===----------------------------------------------------------------------===//
873 
874 void ContentCache::Emit(llvm::Serializer& S) const {
875   S.FlushRecord();
876   S.EmitPtr(this);
877 
878   if (Entry) {
879     llvm::sys::Path Fname(Buffer->getBufferIdentifier());
880 
881     if (Fname.isAbsolute())
882       S.EmitCStr(Fname.c_str());
883     else {
884       // Create an absolute path.
885       // FIXME: This will potentially contain ".." and "." in the path.
886       llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
887       path.appendComponent(Fname.c_str());
888       S.EmitCStr(path.c_str());
889     }
890   }
891   else {
892     const char* p = Buffer->getBufferStart();
893     const char* e = Buffer->getBufferEnd();
894 
895     S.EmitInt(e-p);
896 
897     for ( ; p != e; ++p)
898       S.EmitInt(*p);
899   }
900 
901   S.FlushRecord();
902 }
903 
904 void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
905                                        SourceManager& SMgr,
906                                        FileManager* FMgr,
907                                        std::vector<char>& Buf) {
908   if (FMgr) {
909     llvm::SerializedPtrID PtrID = D.ReadPtrID();
910     D.ReadCStr(Buf,false);
911 
912     // Create/fetch the FileEntry.
913     const char* start = &Buf[0];
914     const FileEntry* E = FMgr->getFile(start,start+Buf.size());
915 
916     // FIXME: Ideally we want a lazy materialization of the ContentCache
917     //  anyway, because we don't want to read in source files unless this
918     //  is absolutely needed.
919     if (!E)
920       D.RegisterPtr(PtrID,NULL);
921     else
922       // Get the ContextCache object and register it with the deserializer.
923       D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E));
924     return;
925   }
926 
927   // Register the ContextCache object with the deserializer.
928   /* FIXME:
929   ContentCache *Entry
930   SMgr.MemBufferInfos.push_back(ContentCache());
931    = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
932   D.RegisterPtr(&Entry);
933 
934   // Create the buffer.
935   unsigned Size = D.ReadInt();
936   Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
937 
938   // Read the contents of the buffer.
939   char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
940   for (unsigned i = 0; i < Size ; ++i)
941     p[i] = D.ReadInt();
942    */
943 }
944 
945 void SourceManager::Emit(llvm::Serializer& S) const {
946   S.EnterBlock();
947   S.EmitPtr(this);
948   S.EmitInt(MainFileID.getOpaqueValue());
949 
950   // Emit: FileInfos.  Just emit the file name.
951   S.EnterBlock();
952 
953   // FIXME: Emit FileInfos.
954   //std::for_each(FileInfos.begin(), FileInfos.end(),
955   //              S.MakeEmitter<ContentCache>());
956 
957   S.ExitBlock();
958 
959   // Emit: MemBufferInfos
960   S.EnterBlock();
961 
962   /* FIXME: EMIT.
963   std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
964                 S.MakeEmitter<ContentCache>());
965    */
966 
967   S.ExitBlock();
968 
969   // FIXME: Emit SLocEntryTable.
970 
971   S.ExitBlock();
972 }
973 
974 SourceManager*
975 SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) {
976   SourceManager *M = new SourceManager();
977   D.RegisterPtr(M);
978 
979   // Read: the FileID of the main source file of the translation unit.
980   M->MainFileID = FileID::get(D.ReadInt());
981 
982   std::vector<char> Buf;
983 
984   /*{ // FIXME Read: FileInfos.
985     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
986     while (!D.FinishedBlock(BLoc))
987     ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
988   }*/
989 
990   /*{ // FIXME Read: MemBufferInfos.
991     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
992     while (!D.FinishedBlock(BLoc))
993     ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
994     }*/
995 
996   // FIXME: Read SLocEntryTable.
997 
998   return M;
999 }
1000