xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision 3c91971b3391a6aa7f48ee11986813204006adf9)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/FileManager.h"
16 #include "llvm/Support/Compiler.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/System/Path.h"
19 #include "llvm/Bitcode/Serialize.h"
20 #include "llvm/Bitcode/Deserialize.h"
21 #include "llvm/Support/Streams.h"
22 #include <algorithm>
23 using namespace clang;
24 using namespace SrcMgr;
25 using llvm::MemoryBuffer;
26 
27 // This (temporary) directive toggles between lazy and eager creation of
28 // MemBuffers.  This directive is not permanent, and is here to test a few
29 // potential optimizations in PTH.  Once it is clear whether eager or lazy
30 // creation of MemBuffers is better this directive will get removed.
31 #define LAZY
32 
33 ContentCache::~ContentCache() {
34   delete Buffer;
35   delete [] SourceLineCache;
36 }
37 
38 /// getSizeBytesMapped - Returns the number of bytes actually mapped for
39 ///  this ContentCache.  This can be 0 if the MemBuffer was not actually
40 ///  instantiated.
41 unsigned ContentCache::getSizeBytesMapped() const {
42   return Buffer ? Buffer->getBufferSize() : 0;
43 }
44 
45 /// getSize - Returns the size of the content encapsulated by this ContentCache.
46 ///  This can be the size of the source file or the size of an arbitrary
47 ///  scratch buffer.  If the ContentCache encapsulates a source file, that
48 ///  file is not lazily brought in from disk to satisfy this query.
49 unsigned ContentCache::getSize() const {
50   return Entry ? Entry->getSize() : Buffer->getBufferSize();
51 }
52 
53 const llvm::MemoryBuffer* ContentCache::getBuffer() const {
54 #ifdef LAZY
55   // Lazily create the Buffer for ContentCaches that wrap files.
56   if (!Buffer && Entry) {
57     // FIXME: Should we support a way to not have to do this check over
58     //   and over if we cannot open the file?
59     // FIXME: This const_cast is ugly.  Should we make getBuffer() non-const?
60     const_cast<ContentCache*>(this)->Buffer =
61       MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize());
62   }
63 #endif
64   return Buffer;
65 }
66 
67 
68 /// getFileInfo - Create or return a cached FileInfo for the specified file.
69 ///
70 const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
71 
72   assert(FileEnt && "Didn't specify a file entry to use?");
73   // Do we already have information about this file?
74   std::set<ContentCache>::iterator I =
75     FileInfos.lower_bound(ContentCache(FileEnt));
76 
77   if (I != FileInfos.end() && I->Entry == FileEnt)
78     return &*I;
79 
80   // Nope, get information.
81 #ifndef LAZY
82   const MemoryBuffer *File =
83     MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize());
84   if (File == 0)
85     return 0;
86 #endif
87 
88   ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
89 #ifndef LAZY
90   Entry.setBuffer(File);
91 #endif
92   Entry.SourceLineCache = 0;
93   Entry.NumLines = 0;
94   return &Entry;
95 }
96 
97 
98 /// createMemBufferContentCache - Create a new ContentCache for the specified
99 ///  memory buffer.  This does no caching.
100 const ContentCache*
101 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
102   // Add a new ContentCache to the MemBufferInfos list and return it.  We
103   // must default construct the object first that the instance actually
104   // stored within MemBufferInfos actually owns the Buffer, and not any
105   // temporary we would use in the call to "push_back".
106   MemBufferInfos.push_back(ContentCache());
107   ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
108   Entry.setBuffer(Buffer);
109   return &Entry;
110 }
111 
112 
113 /// createFileID - Create a new fileID for the specified ContentCache and
114 /// include position.  This works regardless of whether the ContentCache
115 /// corresponds to a file or some other input source.
116 unsigned SourceManager::createFileID(const ContentCache *File,
117                                      SourceLocation IncludePos,
118                                      SrcMgr::CharacteristicKind FileCharacter) {
119   // If FileEnt is really large (e.g. it's a large .i file), we may not be able
120   // to fit an arbitrary position in the file in the FilePos field.  To handle
121   // this, we create one FileID for each chunk of the file that fits in a
122   // FilePos field.
123   unsigned FileSize = File->getSize();
124   if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
125     FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter));
126     assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
127            "Ran out of file ID's!");
128     return FileIDs.size();
129   }
130 
131   // Create one FileID for each chunk of the file.
132   unsigned Result = FileIDs.size()+1;
133 
134   unsigned ChunkNo = 0;
135   while (1) {
136     FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File,
137                                       FileCharacter));
138 
139     if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
140     FileSize -= (1 << SourceLocation::FilePosBits);
141   }
142 
143   assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
144          "Ran out of file ID's!");
145   return Result;
146 }
147 
148 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
149 /// that a token from SpellingLoc should actually be referenced from
150 /// InstantiationLoc.
151 SourceLocation SourceManager::getInstantiationLoc(SourceLocation SpellingLoc,
152                                                   SourceLocation InstantLoc) {
153   // The specified source location may be a mapped location, due to a macro
154   // instantiation or #line directive.  Strip off this information to find out
155   // where the characters are actually located.
156   SpellingLoc = getSpellingLoc(SpellingLoc);
157 
158   // Resolve InstantLoc down to a real logical location.
159   InstantLoc = getLogicalLoc(InstantLoc);
160 
161 
162   // If the last macro id is close to the currently requested location, try to
163   // reuse it.  This implements a small cache.
164   for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
165     MacroIDInfo &LastOne = MacroIDs[i];
166 
167     // The instanitation point and source SpellingLoc have to exactly match to
168     // reuse (for now).  We could allow "nearby" instantiations in the future.
169     if (LastOne.getInstantiationLoc() != InstantLoc ||
170         LastOne.getSpellingLoc().getFileID() != SpellingLoc.getFileID())
171       continue;
172 
173     // Check to see if the spellloc of the token came from near enough to reuse.
174     int SpellDelta = SpellingLoc.getRawFilePos() -
175                      LastOne.getSpellingLoc().getRawFilePos();
176     if (SourceLocation::isValidMacroSpellingOffs(SpellDelta))
177       return SourceLocation::getMacroLoc(i, SpellDelta);
178   }
179 
180 
181   MacroIDs.push_back(MacroIDInfo::get(InstantLoc, SpellingLoc));
182   return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
183 }
184 
185 /// getBufferData - Return a pointer to the start and end of the character
186 /// data for the specified FileID.
187 std::pair<const char*, const char*>
188 SourceManager::getBufferData(unsigned FileID) const {
189   const llvm::MemoryBuffer *Buf = getBuffer(FileID);
190   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
191 }
192 
193 
194 /// getCharacterData - Return a pointer to the start of the specified location
195 /// in the appropriate MemoryBuffer.
196 const char *SourceManager::getCharacterData(SourceLocation SL) const {
197   // Note that this is a hot function in the getSpelling() path, which is
198   // heavily used by -E mode.
199   SL = getSpellingLoc(SL);
200 
201   // Note that calling 'getBuffer()' may lazily page in a source file.
202   return getContentCache(SL.getFileID())->getBuffer()->getBufferStart() +
203          getFullFilePos(SL);
204 }
205 
206 
207 /// getColumnNumber - Return the column # for the specified file position.
208 /// this is significantly cheaper to compute than the line number.  This returns
209 /// zero if the column number isn't known.
210 unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
211   unsigned FileID = Loc.getFileID();
212   if (FileID == 0) return 0;
213 
214   unsigned FilePos = getFullFilePos(Loc);
215   const MemoryBuffer *Buffer = getBuffer(FileID);
216   const char *Buf = Buffer->getBufferStart();
217 
218   unsigned LineStart = FilePos;
219   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
220     --LineStart;
221   return FilePos-LineStart+1;
222 }
223 
224 /// getSourceName - This method returns the name of the file or buffer that
225 /// the SourceLocation specifies.  This can be modified with #line directives,
226 /// etc.
227 const char *SourceManager::getSourceName(SourceLocation Loc) const {
228   unsigned FileID = Loc.getFileID();
229   if (FileID == 0) return "";
230 
231   // To get the source name, first consult the FileEntry (if one exists) before
232   // the MemBuffer as this will avoid unnecessarily paging in the MemBuffer.
233   const SrcMgr::ContentCache* C = getContentCache(FileID);
234   return C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier();
235 }
236 
237 static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
238 static void ComputeLineNumbers(ContentCache* FI) {
239   // Note that calling 'getBuffer()' may lazily page in the file.
240   const MemoryBuffer *Buffer = FI->getBuffer();
241 
242   // Find the file offsets of all of the *physical* source lines.  This does
243   // not look at trigraphs, escaped newlines, or anything else tricky.
244   std::vector<unsigned> LineOffsets;
245 
246   // Line #1 starts at char 0.
247   LineOffsets.push_back(0);
248 
249   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
250   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
251   unsigned Offs = 0;
252   while (1) {
253     // Skip over the contents of the line.
254     // TODO: Vectorize this?  This is very performance sensitive for programs
255     // with lots of diagnostics and in -E mode.
256     const unsigned char *NextBuf = (const unsigned char *)Buf;
257     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
258       ++NextBuf;
259     Offs += NextBuf-Buf;
260     Buf = NextBuf;
261 
262     if (Buf[0] == '\n' || Buf[0] == '\r') {
263       // If this is \n\r or \r\n, skip both characters.
264       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
265         ++Offs, ++Buf;
266       ++Offs, ++Buf;
267       LineOffsets.push_back(Offs);
268     } else {
269       // Otherwise, this is a null.  If end of file, exit.
270       if (Buf == End) break;
271       // Otherwise, skip the null.
272       ++Offs, ++Buf;
273     }
274   }
275 
276   // Copy the offsets into the FileInfo structure.
277   FI->NumLines = LineOffsets.size();
278   FI->SourceLineCache = new unsigned[LineOffsets.size()];
279   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
280 }
281 
282 /// getLineNumber - Given a SourceLocation, return the spelling line number
283 /// for the position indicated.  This requires building and caching a table of
284 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
285 /// about to emit a diagnostic.
286 unsigned SourceManager::getLineNumber(SourceLocation Loc) const {
287   unsigned FileID = Loc.getFileID();
288   if (FileID == 0) return 0;
289 
290   ContentCache* Content;
291 
292   if (LastLineNoFileIDQuery == FileID)
293     Content = LastLineNoContentCache;
294   else
295     Content = const_cast<ContentCache*>(getContentCache(FileID));
296 
297   // If this is the first use of line information for this buffer, compute the
298   /// SourceLineCache for it on demand.
299   if (Content->SourceLineCache == 0)
300     ComputeLineNumbers(Content);
301 
302   // Okay, we know we have a line number table.  Do a binary search to find the
303   // line number that this character position lands on.
304   unsigned *SourceLineCache = Content->SourceLineCache;
305   unsigned *SourceLineCacheStart = SourceLineCache;
306   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
307 
308   unsigned QueriedFilePos = getFullFilePos(Loc)+1;
309 
310   // If the previous query was to the same file, we know both the file pos from
311   // that query and the line number returned.  This allows us to narrow the
312   // search space from the entire file to something near the match.
313   if (LastLineNoFileIDQuery == FileID) {
314     if (QueriedFilePos >= LastLineNoFilePos) {
315       SourceLineCache = SourceLineCache+LastLineNoResult-1;
316 
317       // The query is likely to be nearby the previous one.  Here we check to
318       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
319       // where big comment blocks and vertical whitespace eat up lines but
320       // contribute no tokens.
321       if (SourceLineCache+5 < SourceLineCacheEnd) {
322         if (SourceLineCache[5] > QueriedFilePos)
323           SourceLineCacheEnd = SourceLineCache+5;
324         else if (SourceLineCache+10 < SourceLineCacheEnd) {
325           if (SourceLineCache[10] > QueriedFilePos)
326             SourceLineCacheEnd = SourceLineCache+10;
327           else if (SourceLineCache+20 < SourceLineCacheEnd) {
328             if (SourceLineCache[20] > QueriedFilePos)
329               SourceLineCacheEnd = SourceLineCache+20;
330           }
331         }
332       }
333     } else {
334       SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
335     }
336   }
337 
338   // If the spread is large, do a "radix" test as our initial guess, based on
339   // the assumption that lines average to approximately the same length.
340   // NOTE: This is currently disabled, as it does not appear to be profitable in
341   // initial measurements.
342   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
343     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
344 
345     // Take a stab at guessing where it is.
346     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
347 
348     // Check for -10 and +10 lines.
349     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
350     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
351 
352     // If the computed lower bound is less than the query location, move it in.
353     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
354         SourceLineCacheStart[LowerBound] < QueriedFilePos)
355       SourceLineCache = SourceLineCacheStart+LowerBound;
356 
357     // If the computed upper bound is greater than the query location, move it.
358     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
359         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
360       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
361   }
362 
363   unsigned *Pos
364     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
365   unsigned LineNo = Pos-SourceLineCacheStart;
366 
367   LastLineNoFileIDQuery = FileID;
368   LastLineNoContentCache = Content;
369   LastLineNoFilePos = QueriedFilePos;
370   LastLineNoResult = LineNo;
371   return LineNo;
372 }
373 
374 /// PrintStats - Print statistics to stderr.
375 ///
376 void SourceManager::PrintStats() const {
377   llvm::cerr << "\n*** Source Manager Stats:\n";
378   llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
379              << " mem buffers mapped, " << FileIDs.size()
380              << " file ID's allocated.\n";
381   llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
382              << MacroIDs.size() << " macro expansion FileID's.\n";
383 
384   unsigned NumLineNumsComputed = 0;
385   unsigned NumFileBytesMapped = 0;
386   for (std::set<ContentCache>::const_iterator I =
387        FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
388     NumLineNumsComputed += I->SourceLineCache != 0;
389     NumFileBytesMapped  += I->getSizeBytesMapped();
390   }
391 
392   llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
393              << NumLineNumsComputed << " files with line #'s computed.\n";
394 }
395 
396 //===----------------------------------------------------------------------===//
397 // Serialization.
398 //===----------------------------------------------------------------------===//
399 
400 void ContentCache::Emit(llvm::Serializer& S) const {
401   S.FlushRecord();
402   S.EmitPtr(this);
403 
404   if (Entry) {
405     llvm::sys::Path Fname(Buffer->getBufferIdentifier());
406 
407     if (Fname.isAbsolute())
408       S.EmitCStr(Fname.c_str());
409     else {
410       // Create an absolute path.
411       // FIXME: This will potentially contain ".." and "." in the path.
412       llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
413       path.appendComponent(Fname.c_str());
414       S.EmitCStr(path.c_str());
415     }
416   }
417   else {
418     const char* p = Buffer->getBufferStart();
419     const char* e = Buffer->getBufferEnd();
420 
421     S.EmitInt(e-p);
422 
423     for ( ; p != e; ++p)
424       S.EmitInt(*p);
425   }
426 
427   S.FlushRecord();
428 }
429 
430 void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
431                                        SourceManager& SMgr,
432                                        FileManager* FMgr,
433                                        std::vector<char>& Buf) {
434   if (FMgr) {
435     llvm::SerializedPtrID PtrID = D.ReadPtrID();
436     D.ReadCStr(Buf,false);
437 
438     // Create/fetch the FileEntry.
439     const char* start = &Buf[0];
440     const FileEntry* E = FMgr->getFile(start,start+Buf.size());
441 
442     // FIXME: Ideally we want a lazy materialization of the ContentCache
443     //  anyway, because we don't want to read in source files unless this
444     //  is absolutely needed.
445     if (!E)
446       D.RegisterPtr(PtrID,NULL);
447     else
448       // Get the ContextCache object and register it with the deserializer.
449       D.RegisterPtr(PtrID,SMgr.getContentCache(E));
450   }
451   else {
452     // Register the ContextCache object with the deserializer.
453     SMgr.MemBufferInfos.push_back(ContentCache());
454     ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
455     D.RegisterPtr(&Entry);
456 
457     // Create the buffer.
458     unsigned Size = D.ReadInt();
459     Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
460 
461     // Read the contents of the buffer.
462     char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
463     for (unsigned i = 0; i < Size ; ++i)
464       p[i] = D.ReadInt();
465   }
466 }
467 
468 void FileIDInfo::Emit(llvm::Serializer& S) const {
469   S.Emit(IncludeLoc);
470   S.EmitInt(ChunkNo);
471   S.EmitPtr(Content);
472 }
473 
474 FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
475   FileIDInfo I;
476   I.IncludeLoc = SourceLocation::ReadVal(D);
477   I.ChunkNo = D.ReadInt();
478   D.ReadPtr(I.Content,false);
479   return I;
480 }
481 
482 void MacroIDInfo::Emit(llvm::Serializer& S) const {
483   S.Emit(InstantiationLoc);
484   S.Emit(SpellingLoc);
485 }
486 
487 MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
488   MacroIDInfo I;
489   I.InstantiationLoc = SourceLocation::ReadVal(D);
490   I.SpellingLoc = SourceLocation::ReadVal(D);
491   return I;
492 }
493 
494 void SourceManager::Emit(llvm::Serializer& S) const {
495   S.EnterBlock();
496   S.EmitPtr(this);
497   S.EmitInt(MainFileID);
498 
499   // Emit: FileInfos.  Just emit the file name.
500   S.EnterBlock();
501 
502   std::for_each(FileInfos.begin(),FileInfos.end(),
503                 S.MakeEmitter<ContentCache>());
504 
505   S.ExitBlock();
506 
507   // Emit: MemBufferInfos
508   S.EnterBlock();
509 
510   std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
511                 S.MakeEmitter<ContentCache>());
512 
513   S.ExitBlock();
514 
515   // Emit: FileIDs
516   S.EmitInt(FileIDs.size());
517   std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
518 
519   // Emit: MacroIDs
520   S.EmitInt(MacroIDs.size());
521   std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
522 
523   S.ExitBlock();
524 }
525 
526 SourceManager*
527 SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){
528   SourceManager *M = new SourceManager();
529   D.RegisterPtr(M);
530 
531   // Read: the FileID of the main source file of the translation unit.
532   M->MainFileID = D.ReadInt();
533 
534   std::vector<char> Buf;
535 
536   { // Read: FileInfos.
537     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
538     while (!D.FinishedBlock(BLoc))
539     ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
540   }
541 
542   { // Read: MemBufferInfos.
543     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
544     while (!D.FinishedBlock(BLoc))
545     ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
546   }
547 
548   // Read: FileIDs.
549   unsigned Size = D.ReadInt();
550   M->FileIDs.reserve(Size);
551   for (; Size > 0 ; --Size)
552     M->FileIDs.push_back(FileIDInfo::ReadVal(D));
553 
554   // Read: MacroIDs.
555   Size = D.ReadInt();
556   M->MacroIDs.reserve(Size);
557   for (; Size > 0 ; --Size)
558     M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
559 
560   return M;
561 }
562