xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision 378c5539c8e34c4ab21b0d3cb7cae736a9612fdb)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/FileManager.h"
16 #include "llvm/Support/Compiler.h"
17 #include "llvm/Support/MemoryBuffer.h"
18 #include "llvm/System/Path.h"
19 #include "llvm/Bitcode/Serialize.h"
20 #include "llvm/Bitcode/Deserialize.h"
21 #include "llvm/Support/Streams.h"
22 #include <algorithm>
23 using namespace clang;
24 using namespace SrcMgr;
25 using llvm::MemoryBuffer;
26 
27 ContentCache::~ContentCache() {
28   delete Buffer;
29   delete [] SourceLineCache;
30 }
31 
32 /// getFileInfo - Create or return a cached FileInfo for the specified file.
33 ///
34 const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
35 
36   assert(FileEnt && "Didn't specify a file entry to use?");
37   // Do we already have information about this file?
38   std::set<ContentCache>::iterator I =
39     FileInfos.lower_bound(ContentCache(FileEnt));
40 
41   if (I != FileInfos.end() && I->Entry == FileEnt)
42     return &*I;
43 
44   // Nope, get information.
45   const MemoryBuffer *File =
46     MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize());
47   if (File == 0)
48     return 0;
49 
50   ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
51 
52   Entry.Buffer = File;
53   Entry.SourceLineCache = 0;
54   Entry.NumLines = 0;
55   return &Entry;
56 }
57 
58 
59 /// createMemBufferContentCache - Create a new ContentCache for the specified
60 ///  memory buffer.  This does no caching.
61 const ContentCache*
62 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
63   // Add a new ContentCache to the MemBufferInfos list and return it.  We
64   // must default construct the object first that the instance actually
65   // stored within MemBufferInfos actually owns the Buffer, and not any
66   // temporary we would use in the call to "push_back".
67   MemBufferInfos.push_back(ContentCache());
68   ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
69   Entry.Buffer = Buffer;
70   return &Entry;
71 }
72 
73 
74 /// createFileID - Create a new fileID for the specified ContentCache and
75 /// include position.  This works regardless of whether the ContentCache
76 /// corresponds to a file or some other input source.
77 unsigned SourceManager::createFileID(const ContentCache *File,
78                                      SourceLocation IncludePos,
79                                      SrcMgr::Characteristic_t FileCharacter) {
80   // If FileEnt is really large (e.g. it's a large .i file), we may not be able
81   // to fit an arbitrary position in the file in the FilePos field.  To handle
82   // this, we create one FileID for each chunk of the file that fits in a
83   // FilePos field.
84   unsigned FileSize = File->Buffer->getBufferSize();
85   if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
86     FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File, FileCharacter));
87     assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
88            "Ran out of file ID's!");
89     return FileIDs.size();
90   }
91 
92   // Create one FileID for each chunk of the file.
93   unsigned Result = FileIDs.size()+1;
94 
95   unsigned ChunkNo = 0;
96   while (1) {
97     FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File,
98                                       FileCharacter));
99 
100     if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
101     FileSize -= (1 << SourceLocation::FilePosBits);
102   }
103 
104   assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
105          "Ran out of file ID's!");
106   return Result;
107 }
108 
109 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
110 /// that a token from physloc PhysLoc should actually be referenced from
111 /// InstantiationLoc.
112 SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
113                                                   SourceLocation InstantLoc) {
114   // The specified source location may be a mapped location, due to a macro
115   // instantiation or #line directive.  Strip off this information to find out
116   // where the characters are actually located.
117   PhysLoc = getPhysicalLoc(PhysLoc);
118 
119   // Resolve InstantLoc down to a real logical location.
120   InstantLoc = getLogicalLoc(InstantLoc);
121 
122 
123   // If the last macro id is close to the currently requested location, try to
124   // reuse it.  This implements a small cache.
125   for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
126     MacroIDInfo &LastOne = MacroIDs[i];
127 
128     // The instanitation point and source physloc have to exactly match to reuse
129     // (for now).  We could allow "nearby" instantiations in the future.
130     if (LastOne.getVirtualLoc() != InstantLoc ||
131         LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
132       continue;
133 
134     // Check to see if the physloc of the token came from near enough to reuse.
135     int PhysDelta = PhysLoc.getRawFilePos() -
136                     LastOne.getPhysicalLoc().getRawFilePos();
137     if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
138       return SourceLocation::getMacroLoc(i, PhysDelta);
139   }
140 
141 
142   MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
143   return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
144 }
145 
146 /// getBufferData - Return a pointer to the start and end of the character
147 /// data for the specified FileID.
148 std::pair<const char*, const char*>
149 SourceManager::getBufferData(unsigned FileID) const {
150   const llvm::MemoryBuffer *Buf = getBuffer(FileID);
151   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
152 }
153 
154 
155 /// getCharacterData - Return a pointer to the start of the specified location
156 /// in the appropriate MemoryBuffer.
157 const char *SourceManager::getCharacterData(SourceLocation SL) const {
158   // Note that this is a hot function in the getSpelling() path, which is
159   // heavily used by -E mode.
160   SL = getPhysicalLoc(SL);
161 
162   return getContentCache(SL.getFileID())->Buffer->getBufferStart() +
163          getFullFilePos(SL);
164 }
165 
166 
167 /// getColumnNumber - Return the column # for the specified file position.
168 /// this is significantly cheaper to compute than the line number.  This returns
169 /// zero if the column number isn't known.
170 unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
171   unsigned FileID = Loc.getFileID();
172   if (FileID == 0) return 0;
173 
174   unsigned FilePos = getFullFilePos(Loc);
175   const MemoryBuffer *Buffer = getBuffer(FileID);
176   const char *Buf = Buffer->getBufferStart();
177 
178   unsigned LineStart = FilePos;
179   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
180     --LineStart;
181   return FilePos-LineStart+1;
182 }
183 
184 /// getSourceName - This method returns the name of the file or buffer that
185 /// the SourceLocation specifies.  This can be modified with #line directives,
186 /// etc.
187 const char *SourceManager::getSourceName(SourceLocation Loc) const {
188   unsigned FileID = Loc.getFileID();
189   if (FileID == 0) return "";
190   return getContentCache(FileID)->Buffer->getBufferIdentifier();
191 }
192 
193 static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
194 static void ComputeLineNumbers(ContentCache* FI) {
195   const MemoryBuffer *Buffer = FI->Buffer;
196 
197   // Find the file offsets of all of the *physical* source lines.  This does
198   // not look at trigraphs, escaped newlines, or anything else tricky.
199   std::vector<unsigned> LineOffsets;
200 
201   // Line #1 starts at char 0.
202   LineOffsets.push_back(0);
203 
204   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
205   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
206   unsigned Offs = 0;
207   while (1) {
208     // Skip over the contents of the line.
209     // TODO: Vectorize this?  This is very performance sensitive for programs
210     // with lots of diagnostics and in -E mode.
211     const unsigned char *NextBuf = (const unsigned char *)Buf;
212     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
213       ++NextBuf;
214     Offs += NextBuf-Buf;
215     Buf = NextBuf;
216 
217     if (Buf[0] == '\n' || Buf[0] == '\r') {
218       // If this is \n\r or \r\n, skip both characters.
219       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
220         ++Offs, ++Buf;
221       ++Offs, ++Buf;
222       LineOffsets.push_back(Offs);
223     } else {
224       // Otherwise, this is a null.  If end of file, exit.
225       if (Buf == End) break;
226       // Otherwise, skip the null.
227       ++Offs, ++Buf;
228     }
229   }
230 
231   // Copy the offsets into the FileInfo structure.
232   FI->NumLines = LineOffsets.size();
233   FI->SourceLineCache = new unsigned[LineOffsets.size()];
234   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
235 }
236 
237 /// getLineNumber - Given a SourceLocation, return the physical line number
238 /// for the position indicated.  This requires building and caching a table of
239 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
240 /// about to emit a diagnostic.
241 unsigned SourceManager::getLineNumber(SourceLocation Loc) {
242   unsigned FileID = Loc.getFileID();
243   if (FileID == 0) return 0;
244 
245   ContentCache* Content;
246 
247   if (LastLineNoFileIDQuery == FileID)
248     Content = LastLineNoContentCache;
249   else
250     Content = const_cast<ContentCache*>(getContentCache(FileID));
251 
252   // If this is the first use of line information for this buffer, compute the
253   /// SourceLineCache for it on demand.
254   if (Content->SourceLineCache == 0)
255     ComputeLineNumbers(Content);
256 
257   // Okay, we know we have a line number table.  Do a binary search to find the
258   // line number that this character position lands on.
259   unsigned *SourceLineCache = Content->SourceLineCache;
260   unsigned *SourceLineCacheStart = SourceLineCache;
261   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
262 
263   unsigned QueriedFilePos = getFullFilePos(Loc)+1;
264 
265   // If the previous query was to the same file, we know both the file pos from
266   // that query and the line number returned.  This allows us to narrow the
267   // search space from the entire file to something near the match.
268   if (LastLineNoFileIDQuery == FileID) {
269     if (QueriedFilePos >= LastLineNoFilePos) {
270       SourceLineCache = SourceLineCache+LastLineNoResult-1;
271 
272       // The query is likely to be nearby the previous one.  Here we check to
273       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
274       // where big comment blocks and vertical whitespace eat up lines but
275       // contribute no tokens.
276       if (SourceLineCache+5 < SourceLineCacheEnd) {
277         if (SourceLineCache[5] > QueriedFilePos)
278           SourceLineCacheEnd = SourceLineCache+5;
279         else if (SourceLineCache+10 < SourceLineCacheEnd) {
280           if (SourceLineCache[10] > QueriedFilePos)
281             SourceLineCacheEnd = SourceLineCache+10;
282           else if (SourceLineCache+20 < SourceLineCacheEnd) {
283             if (SourceLineCache[20] > QueriedFilePos)
284               SourceLineCacheEnd = SourceLineCache+20;
285           }
286         }
287       }
288     } else {
289       SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
290     }
291   }
292 
293   // If the spread is large, do a "radix" test as our initial guess, based on
294   // the assumption that lines average to approximately the same length.
295   // NOTE: This is currently disabled, as it does not appear to be profitable in
296   // initial measurements.
297   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
298     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
299 
300     // Take a stab at guessing where it is.
301     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
302 
303     // Check for -10 and +10 lines.
304     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
305     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
306 
307     // If the computed lower bound is less than the query location, move it in.
308     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
309         SourceLineCacheStart[LowerBound] < QueriedFilePos)
310       SourceLineCache = SourceLineCacheStart+LowerBound;
311 
312     // If the computed upper bound is greater than the query location, move it.
313     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
314         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
315       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
316   }
317 
318   unsigned *Pos
319     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
320   unsigned LineNo = Pos-SourceLineCacheStart;
321 
322   LastLineNoFileIDQuery = FileID;
323   LastLineNoContentCache = Content;
324   LastLineNoFilePos = QueriedFilePos;
325   LastLineNoResult = LineNo;
326   return LineNo;
327 }
328 
329 /// PrintStats - Print statistics to stderr.
330 ///
331 void SourceManager::PrintStats() const {
332   llvm::cerr << "\n*** Source Manager Stats:\n";
333   llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
334              << " mem buffers mapped, " << FileIDs.size()
335              << " file ID's allocated.\n";
336   llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
337              << MacroIDs.size() << " macro expansion FileID's.\n";
338 
339   unsigned NumLineNumsComputed = 0;
340   unsigned NumFileBytesMapped = 0;
341   for (std::set<ContentCache>::const_iterator I =
342        FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
343     NumLineNumsComputed += I->SourceLineCache != 0;
344     NumFileBytesMapped  += I->Buffer->getBufferSize();
345   }
346 
347   llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
348              << NumLineNumsComputed << " files with line #'s computed.\n";
349 }
350 
351 //===----------------------------------------------------------------------===//
352 // Serialization.
353 //===----------------------------------------------------------------------===//
354 
355 void ContentCache::Emit(llvm::Serializer& S) const {
356   S.FlushRecord();
357   S.EmitPtr(this);
358 
359   if (Entry) {
360     llvm::sys::Path Fname(Buffer->getBufferIdentifier());
361 
362     if (Fname.isAbsolute())
363       S.EmitCStr(Fname.c_str());
364     else {
365       // Create an absolute path.
366       // FIXME: This will potentially contain ".." and "." in the path.
367       llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
368       path.appendComponent(Fname.c_str());
369       S.EmitCStr(path.c_str());
370     }
371   }
372   else {
373     const char* p = Buffer->getBufferStart();
374     const char* e = Buffer->getBufferEnd();
375 
376     S.EmitInt(e-p);
377 
378     for ( ; p != e; ++p)
379       S.EmitInt(*p);
380   }
381 
382   S.FlushRecord();
383 }
384 
385 void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
386                                        SourceManager& SMgr,
387                                        FileManager* FMgr,
388                                        std::vector<char>& Buf) {
389   if (FMgr) {
390     llvm::SerializedPtrID PtrID = D.ReadPtrID();
391     D.ReadCStr(Buf,false);
392 
393     // Create/fetch the FileEntry.
394     const char* start = &Buf[0];
395     const FileEntry* E = FMgr->getFile(start,start+Buf.size());
396 
397     // FIXME: Ideally we want a lazy materialization of the ContentCache
398     //  anyway, because we don't want to read in source files unless this
399     //  is absolutely needed.
400     if (!E)
401       D.RegisterPtr(PtrID,NULL);
402     else
403       // Get the ContextCache object and register it with the deserializer.
404       D.RegisterPtr(PtrID,SMgr.getContentCache(E));
405   }
406   else {
407     // Register the ContextCache object with the deserializer.
408     SMgr.MemBufferInfos.push_back(ContentCache());
409     ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
410     D.RegisterPtr(&Entry);
411 
412     // Create the buffer.
413     unsigned Size = D.ReadInt();
414     Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
415 
416     // Read the contents of the buffer.
417     char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
418     for (unsigned i = 0; i < Size ; ++i)
419       p[i] = D.ReadInt();
420   }
421 }
422 
423 void FileIDInfo::Emit(llvm::Serializer& S) const {
424   S.Emit(IncludeLoc);
425   S.EmitInt(ChunkNo);
426   S.EmitPtr(Content);
427 }
428 
429 FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
430   FileIDInfo I;
431   I.IncludeLoc = SourceLocation::ReadVal(D);
432   I.ChunkNo = D.ReadInt();
433   D.ReadPtr(I.Content,false);
434   return I;
435 }
436 
437 void MacroIDInfo::Emit(llvm::Serializer& S) const {
438   S.Emit(VirtualLoc);
439   S.Emit(PhysicalLoc);
440 }
441 
442 MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
443   MacroIDInfo I;
444   I.VirtualLoc = SourceLocation::ReadVal(D);
445   I.PhysicalLoc = SourceLocation::ReadVal(D);
446   return I;
447 }
448 
449 void SourceManager::Emit(llvm::Serializer& S) const {
450   S.EnterBlock();
451   S.EmitPtr(this);
452   S.EmitInt(MainFileID);
453 
454   // Emit: FileInfos.  Just emit the file name.
455   S.EnterBlock();
456 
457   std::for_each(FileInfos.begin(),FileInfos.end(),
458                 S.MakeEmitter<ContentCache>());
459 
460   S.ExitBlock();
461 
462   // Emit: MemBufferInfos
463   S.EnterBlock();
464 
465   std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
466                 S.MakeEmitter<ContentCache>());
467 
468   S.ExitBlock();
469 
470   // Emit: FileIDs
471   S.EmitInt(FileIDs.size());
472   std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
473 
474   // Emit: MacroIDs
475   S.EmitInt(MacroIDs.size());
476   std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
477 
478   S.ExitBlock();
479 }
480 
481 SourceManager*
482 SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){
483   SourceManager *M = new SourceManager();
484   D.RegisterPtr(M);
485 
486   // Read: the FileID of the main source file of the translation unit.
487   M->MainFileID = D.ReadInt();
488 
489   std::vector<char> Buf;
490 
491   { // Read: FileInfos.
492     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
493     while (!D.FinishedBlock(BLoc))
494     ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
495   }
496 
497   { // Read: MemBufferInfos.
498     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
499     while (!D.FinishedBlock(BLoc))
500     ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
501   }
502 
503   // Read: FileIDs.
504   unsigned Size = D.ReadInt();
505   M->FileIDs.reserve(Size);
506   for (; Size > 0 ; --Size)
507     M->FileIDs.push_back(FileIDInfo::ReadVal(D));
508 
509   // Read: MacroIDs.
510   Size = D.ReadInt();
511   M->MacroIDs.reserve(Size);
512   for (; Size > 0 ; --Size)
513     M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
514 
515   return M;
516 }
517