xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision 595cf9ff81ebef21f73e0f0c479451d97809fe65)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/FileManager.h"
16 #include "llvm/Support/Compiler.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/System/Path.h"
19 #include "llvm/Bitcode/Serialize.h"
20 #include "llvm/Bitcode/Deserialize.h"
21 #include "llvm/Support/Streams.h"
22 #include <algorithm>
23 using namespace clang;
24 using namespace SrcMgr;
25 using llvm::MemoryBuffer;
26 
27 ContentCache::~ContentCache() {
28   delete Buffer;
29   delete [] SourceLineCache;
30 }
31 
32 /// getFileInfo - Create or return a cached FileInfo for the specified file.
33 ///
34 const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
35 
36   assert(FileEnt && "Didn't specify a file entry to use?");
37   // Do we already have information about this file?
38   std::set<ContentCache>::iterator I =
39     FileInfos.lower_bound(ContentCache(FileEnt));
40 
41   if (I != FileInfos.end() && I->Entry == FileEnt)
42     return &*I;
43 
44   // Nope, get information.
45   const MemoryBuffer *File =
46     MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize());
47   if (File == 0)
48     return 0;
49 
50   ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
51 
52   Entry.Buffer = File;
53   Entry.SourceLineCache = 0;
54   Entry.NumLines = 0;
55   return &Entry;
56 }
57 
58 
59 /// createMemBufferContentCache - Create a new ContentCache for the specified
60 ///  memory buffer.  This does no caching.
61 const ContentCache*
62 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
63   // Add a new ContentCache to the MemBufferInfos list and return it.  We
64   // must default construct the object first that the instance actually
65   // stored within MemBufferInfos actually owns the Buffer, and not any
66   // temporary we would use in the call to "push_back".
67   MemBufferInfos.push_back(ContentCache());
68   ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
69   Entry.Buffer = Buffer;
70   return &Entry;
71 }
72 
73 
74 /// createFileID - Create a new fileID for the specified ContentCache and
75 /// include position.  This works regardless of whether the ContentCache
76 /// corresponds to a file or some other input source.
77 unsigned SourceManager::createFileID(const ContentCache *File,
78                                      SourceLocation IncludePos) {
79   // If FileEnt is really large (e.g. it's a large .i file), we may not be able
80   // to fit an arbitrary position in the file in the FilePos field.  To handle
81   // this, we create one FileID for each chunk of the file that fits in a
82   // FilePos field.
83   unsigned FileSize = File->Buffer->getBufferSize();
84   if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
85     FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
86     assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
87            "Ran out of file ID's!");
88     return FileIDs.size();
89   }
90 
91   // Create one FileID for each chunk of the file.
92   unsigned Result = FileIDs.size()+1;
93 
94   unsigned ChunkNo = 0;
95   while (1) {
96     FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
97 
98     if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
99     FileSize -= (1 << SourceLocation::FilePosBits);
100   }
101 
102   assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
103          "Ran out of file ID's!");
104   return Result;
105 }
106 
107 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
108 /// that a token from physloc PhysLoc should actually be referenced from
109 /// InstantiationLoc.
110 SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
111                                                   SourceLocation InstantLoc) {
112   // The specified source location may be a mapped location, due to a macro
113   // instantiation or #line directive.  Strip off this information to find out
114   // where the characters are actually located.
115   PhysLoc = getPhysicalLoc(PhysLoc);
116 
117   // Resolve InstantLoc down to a real logical location.
118   InstantLoc = getLogicalLoc(InstantLoc);
119 
120 
121   // If the last macro id is close to the currently requested location, try to
122   // reuse it.  This implements a small cache.
123   for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
124     MacroIDInfo &LastOne = MacroIDs[i];
125 
126     // The instanitation point and source physloc have to exactly match to reuse
127     // (for now).  We could allow "nearby" instantiations in the future.
128     if (LastOne.getVirtualLoc() != InstantLoc ||
129         LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
130       continue;
131 
132     // Check to see if the physloc of the token came from near enough to reuse.
133     int PhysDelta = PhysLoc.getRawFilePos() -
134                     LastOne.getPhysicalLoc().getRawFilePos();
135     if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
136       return SourceLocation::getMacroLoc(i, PhysDelta);
137   }
138 
139 
140   MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
141   return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
142 }
143 
144 /// getBufferData - Return a pointer to the start and end of the character
145 /// data for the specified FileID.
146 std::pair<const char*, const char*>
147 SourceManager::getBufferData(unsigned FileID) const {
148   const llvm::MemoryBuffer *Buf = getBuffer(FileID);
149   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
150 }
151 
152 
153 /// getCharacterData - Return a pointer to the start of the specified location
154 /// in the appropriate MemoryBuffer.
155 const char *SourceManager::getCharacterData(SourceLocation SL) const {
156   // Note that this is a hot function in the getSpelling() path, which is
157   // heavily used by -E mode.
158   SL = getPhysicalLoc(SL);
159 
160   return getContentCache(SL.getFileID())->Buffer->getBufferStart() +
161          getFullFilePos(SL);
162 }
163 
164 
165 /// getColumnNumber - Return the column # for the specified file position.
166 /// this is significantly cheaper to compute than the line number.  This returns
167 /// zero if the column number isn't known.
168 unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
169   unsigned FileID = Loc.getFileID();
170   if (FileID == 0) return 0;
171 
172   unsigned FilePos = getFullFilePos(Loc);
173   const MemoryBuffer *Buffer = getBuffer(FileID);
174   const char *Buf = Buffer->getBufferStart();
175 
176   unsigned LineStart = FilePos;
177   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
178     --LineStart;
179   return FilePos-LineStart+1;
180 }
181 
182 /// getSourceName - This method returns the name of the file or buffer that
183 /// the SourceLocation specifies.  This can be modified with #line directives,
184 /// etc.
185 const char *SourceManager::getSourceName(SourceLocation Loc) const {
186   unsigned FileID = Loc.getFileID();
187   if (FileID == 0) return "";
188   return getContentCache(FileID)->Buffer->getBufferIdentifier();
189 }
190 
191 static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
192 static void ComputeLineNumbers(ContentCache* FI) {
193   const MemoryBuffer *Buffer = FI->Buffer;
194 
195   // Find the file offsets of all of the *physical* source lines.  This does
196   // not look at trigraphs, escaped newlines, or anything else tricky.
197   std::vector<unsigned> LineOffsets;
198 
199   // Line #1 starts at char 0.
200   LineOffsets.push_back(0);
201 
202   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
203   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
204   unsigned Offs = 0;
205   while (1) {
206     // Skip over the contents of the line.
207     // TODO: Vectorize this?  This is very performance sensitive for programs
208     // with lots of diagnostics and in -E mode.
209     const unsigned char *NextBuf = (const unsigned char *)Buf;
210     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
211       ++NextBuf;
212     Offs += NextBuf-Buf;
213     Buf = NextBuf;
214 
215     if (Buf[0] == '\n' || Buf[0] == '\r') {
216       // If this is \n\r or \r\n, skip both characters.
217       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
218         ++Offs, ++Buf;
219       ++Offs, ++Buf;
220       LineOffsets.push_back(Offs);
221     } else {
222       // Otherwise, this is a null.  If end of file, exit.
223       if (Buf == End) break;
224       // Otherwise, skip the null.
225       ++Offs, ++Buf;
226     }
227   }
228 
229   // Copy the offsets into the FileInfo structure.
230   FI->NumLines = LineOffsets.size();
231   FI->SourceLineCache = new unsigned[LineOffsets.size()];
232   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
233 }
234 
235 /// getLineNumber - Given a SourceLocation, return the physical line number
236 /// for the position indicated.  This requires building and caching a table of
237 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
238 /// about to emit a diagnostic.
239 unsigned SourceManager::getLineNumber(SourceLocation Loc) {
240   unsigned FileID = Loc.getFileID();
241   if (FileID == 0) return 0;
242 
243   ContentCache* Content;
244 
245   if (LastLineNoFileIDQuery == FileID)
246     Content = LastLineNoContentCache;
247   else
248     Content = const_cast<ContentCache*>(getContentCache(FileID));
249 
250   // If this is the first use of line information for this buffer, compute the
251   /// SourceLineCache for it on demand.
252   if (Content->SourceLineCache == 0)
253     ComputeLineNumbers(Content);
254 
255   // Okay, we know we have a line number table.  Do a binary search to find the
256   // line number that this character position lands on.
257   unsigned *SourceLineCache = Content->SourceLineCache;
258   unsigned *SourceLineCacheStart = SourceLineCache;
259   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
260 
261   unsigned QueriedFilePos = getFullFilePos(Loc)+1;
262 
263   // If the previous query was to the same file, we know both the file pos from
264   // that query and the line number returned.  This allows us to narrow the
265   // search space from the entire file to something near the match.
266   if (LastLineNoFileIDQuery == FileID) {
267     if (QueriedFilePos >= LastLineNoFilePos) {
268       SourceLineCache = SourceLineCache+LastLineNoResult-1;
269 
270       // The query is likely to be nearby the previous one.  Here we check to
271       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
272       // where big comment blocks and vertical whitespace eat up lines but
273       // contribute no tokens.
274       if (SourceLineCache+5 < SourceLineCacheEnd) {
275         if (SourceLineCache[5] > QueriedFilePos)
276           SourceLineCacheEnd = SourceLineCache+5;
277         else if (SourceLineCache+10 < SourceLineCacheEnd) {
278           if (SourceLineCache[10] > QueriedFilePos)
279             SourceLineCacheEnd = SourceLineCache+10;
280           else if (SourceLineCache+20 < SourceLineCacheEnd) {
281             if (SourceLineCache[20] > QueriedFilePos)
282               SourceLineCacheEnd = SourceLineCache+20;
283           }
284         }
285       }
286     } else {
287       SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
288     }
289   }
290 
291   // If the spread is large, do a "radix" test as our initial guess, based on
292   // the assumption that lines average to approximately the same length.
293   // NOTE: This is currently disabled, as it does not appear to be profitable in
294   // initial measurements.
295   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
296     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
297 
298     // Take a stab at guessing where it is.
299     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
300 
301     // Check for -10 and +10 lines.
302     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
303     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
304 
305     // If the computed lower bound is less than the query location, move it in.
306     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
307         SourceLineCacheStart[LowerBound] < QueriedFilePos)
308       SourceLineCache = SourceLineCacheStart+LowerBound;
309 
310     // If the computed upper bound is greater than the query location, move it.
311     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
312         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
313       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
314   }
315 
316   unsigned *Pos
317     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
318   unsigned LineNo = Pos-SourceLineCacheStart;
319 
320   LastLineNoFileIDQuery = FileID;
321   LastLineNoContentCache = Content;
322   LastLineNoFilePos = QueriedFilePos;
323   LastLineNoResult = LineNo;
324   return LineNo;
325 }
326 
327 /// PrintStats - Print statistics to stderr.
328 ///
329 void SourceManager::PrintStats() const {
330   llvm::cerr << "\n*** Source Manager Stats:\n";
331   llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
332              << " mem buffers mapped, " << FileIDs.size()
333              << " file ID's allocated.\n";
334   llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
335              << MacroIDs.size() << " macro expansion FileID's.\n";
336 
337   unsigned NumLineNumsComputed = 0;
338   unsigned NumFileBytesMapped = 0;
339   for (std::set<ContentCache>::const_iterator I =
340        FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
341     NumLineNumsComputed += I->SourceLineCache != 0;
342     NumFileBytesMapped  += I->Buffer->getBufferSize();
343   }
344 
345   llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
346              << NumLineNumsComputed << " files with line #'s computed.\n";
347 }
348 
349 //===----------------------------------------------------------------------===//
350 // Serialization.
351 //===----------------------------------------------------------------------===//
352 
353 void ContentCache::Emit(llvm::Serializer& S) const {
354   S.FlushRecord();
355   S.EmitPtr(this);
356 
357   if (Entry) {
358     llvm::sys::Path Fname(Buffer->getBufferIdentifier());
359 
360     if (Fname.isAbsolute())
361       S.EmitCStr(Fname.c_str());
362     else {
363       // Create an absolute path.
364       // FIXME: This will potentially contain ".." and "." in the path.
365       llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
366       path.appendComponent(Fname.c_str());
367       S.EmitCStr(path.c_str());
368     }
369   }
370   else {
371     const char* p = Buffer->getBufferStart();
372     const char* e = Buffer->getBufferEnd();
373 
374     S.EmitInt(e-p);
375 
376     for ( ; p != e; ++p)
377       S.EmitInt(*p);
378   }
379 
380   S.FlushRecord();
381 }
382 
383 void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
384                                        SourceManager& SMgr,
385                                        FileManager* FMgr,
386                                        std::vector<char>& Buf) {
387   if (FMgr) {
388     llvm::SerializedPtrID PtrID = D.ReadPtrID();
389     D.ReadCStr(Buf,false);
390 
391     // Create/fetch the FileEntry.
392     const char* start = &Buf[0];
393     const FileEntry* E = FMgr->getFile(start,start+Buf.size());
394 
395     // FIXME: Ideally we want a lazy materialization of the ContentCache
396     //  anyway, because we don't want to read in source files unless this
397     //  is absolutely needed.
398     if (!E)
399       D.RegisterPtr(PtrID,NULL);
400     else
401       // Get the ContextCache object and register it with the deserializer.
402       D.RegisterPtr(PtrID,SMgr.getContentCache(E));
403   }
404   else {
405     // Register the ContextCache object with the deserializer.
406     SMgr.MemBufferInfos.push_back(ContentCache());
407     ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
408     D.RegisterPtr(&Entry);
409 
410     // Create the buffer.
411     unsigned Size = D.ReadInt();
412     Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
413 
414     // Read the contents of the buffer.
415     char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
416     for (unsigned i = 0; i < Size ; ++i)
417       p[i] = D.ReadInt();
418   }
419 }
420 
421 void FileIDInfo::Emit(llvm::Serializer& S) const {
422   S.Emit(IncludeLoc);
423   S.EmitInt(ChunkNo);
424   S.EmitPtr(Content);
425 }
426 
427 FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
428   FileIDInfo I;
429   I.IncludeLoc = SourceLocation::ReadVal(D);
430   I.ChunkNo = D.ReadInt();
431   D.ReadPtr(I.Content,false);
432   return I;
433 }
434 
435 void MacroIDInfo::Emit(llvm::Serializer& S) const {
436   S.Emit(VirtualLoc);
437   S.Emit(PhysicalLoc);
438 }
439 
440 MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
441   MacroIDInfo I;
442   I.VirtualLoc = SourceLocation::ReadVal(D);
443   I.PhysicalLoc = SourceLocation::ReadVal(D);
444   return I;
445 }
446 
447 void SourceManager::Emit(llvm::Serializer& S) const {
448   S.EnterBlock();
449   S.EmitPtr(this);
450   S.EmitInt(MainFileID);
451 
452   // Emit: FileInfos.  Just emit the file name.
453   S.EnterBlock();
454 
455   std::for_each(FileInfos.begin(),FileInfos.end(),
456                 S.MakeEmitter<ContentCache>());
457 
458   S.ExitBlock();
459 
460   // Emit: MemBufferInfos
461   S.EnterBlock();
462 
463   std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
464                 S.MakeEmitter<ContentCache>());
465 
466   S.ExitBlock();
467 
468   // Emit: FileIDs
469   S.EmitInt(FileIDs.size());
470   std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
471 
472   // Emit: MacroIDs
473   S.EmitInt(MacroIDs.size());
474   std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
475 
476   S.ExitBlock();
477 }
478 
479 SourceManager*
480 SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){
481   SourceManager *M = new SourceManager();
482   D.RegisterPtr(M);
483 
484   // Read: the FileID of the main source file of the translation unit.
485   M->MainFileID = D.ReadInt();
486 
487   std::vector<char> Buf;
488 
489   { // Read: FileInfos.
490     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
491     while (!D.FinishedBlock(BLoc))
492     ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
493   }
494 
495   { // Read: MemBufferInfos.
496     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
497     while (!D.FinishedBlock(BLoc))
498     ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
499   }
500 
501   // Read: FileIDs.
502   unsigned Size = D.ReadInt();
503   M->FileIDs.reserve(Size);
504   for (; Size > 0 ; --Size)
505     M->FileIDs.push_back(FileIDInfo::ReadVal(D));
506 
507   // Read: MacroIDs.
508   Size = D.ReadInt();
509   M->MacroIDs.reserve(Size);
510   for (; Size > 0 ; --Size)
511     M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
512 
513   return M;
514 }
515