xref: /llvm-project/clang/lib/Basic/SourceManager.cpp (revision 93eea6e1a00c8409a9dfaf72c0494b3a00be4ee8)
1 //===--- SourceManager.cpp - Track and cache source files -----------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file implements the SourceManager interface.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/SourceManager.h"
15 #include "clang/Basic/FileManager.h"
16 #include "llvm/Config/config.h"
17 #include "llvm/Support/Compiler.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include "llvm/System/Path.h"
20 #include "llvm/Bitcode/Serialize.h"
21 #include "llvm/Bitcode/Deserialize.h"
22 #include "llvm/Support/Streams.h"
23 #include <algorithm>
24 #include <fcntl.h>
25 using namespace clang;
26 using namespace SrcMgr;
27 using llvm::MemoryBuffer;
28 
29 ContentCache::~ContentCache() {
30   delete Buffer;
31   delete [] SourceLineCache;
32 }
33 
34 // FIXME: REMOVE THESE
35 #include <unistd.h>
36 #include <sys/types.h>
37 #if !defined(_MSC_VER) && !defined(__MINGW32__)
38 #include <sys/uio.h>
39 #include <sys/fcntl.h>
40 #else
41 #include <io.h>
42 #endif
43 #include <cerrno>
44 
45 static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
46 #if 0
47   // FIXME: Reintroduce this and zap this function once the common llvm stuff
48   // is fast for the small case.
49   return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
50                                FileEnt->getSize());
51 #endif
52 
53   // If the file is larger than some threshold, use 'read', otherwise use mmap.
54   if (FileEnt->getSize() >= 4096*12)
55     return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
56                                  0, FileEnt->getSize());
57 
58   MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
59                                                          FileEnt->getName());
60   char *BufPtr = const_cast<char*>(SB->getBufferStart());
61 
62 #if defined(LLVM_ON_WIN32)
63   int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY);
64 #else
65   int FD = ::open(FileEnt->getName(), O_RDONLY);
66 #endif
67   if (FD == -1) {
68     delete SB;
69     return 0;
70   }
71 
72   unsigned BytesLeft = FileEnt->getSize();
73   while (BytesLeft) {
74     ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
75     if (NumRead != -1) {
76       BytesLeft -= NumRead;
77       BufPtr += NumRead;
78     } else if (errno == EINTR) {
79       // try again
80     } else {
81       // error reading.
82       close(FD);
83       delete SB;
84       return 0;
85     }
86   }
87   close(FD);
88 
89   return SB;
90 }
91 
92 
93 /// getFileInfo - Create or return a cached FileInfo for the specified file.
94 ///
95 const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) {
96 
97   assert(FileEnt && "Didn't specify a file entry to use?");
98   // Do we already have information about this file?
99   std::set<ContentCache>::iterator I =
100     FileInfos.lower_bound(ContentCache(FileEnt));
101 
102   if (I != FileInfos.end() && I->Entry == FileEnt)
103     return &*I;
104 
105   // Nope, get information.
106   const MemoryBuffer *File = ReadFileFast(FileEnt);
107   if (File == 0)
108     return 0;
109 
110   ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt));
111 
112   Entry.Buffer = File;
113   Entry.SourceLineCache = 0;
114   Entry.NumLines = 0;
115   return &Entry;
116 }
117 
118 
119 /// createMemBufferContentCache - Create a new ContentCache for the specified
120 ///  memory buffer.  This does no caching.
121 const ContentCache*
122 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) {
123   // Add a new ContentCache to the MemBufferInfos list and return it.  We
124   // must default construct the object first that the instance actually
125   // stored within MemBufferInfos actually owns the Buffer, and not any
126   // temporary we would use in the call to "push_back".
127   MemBufferInfos.push_back(ContentCache());
128   ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back());
129   Entry.Buffer = Buffer;
130   return &Entry;
131 }
132 
133 
134 /// createFileID - Create a new fileID for the specified ContentCache and
135 /// include position.  This works regardless of whether the ContentCache
136 /// corresponds to a file or some other input source.
137 unsigned SourceManager::createFileID(const ContentCache *File,
138                                      SourceLocation IncludePos) {
139   // If FileEnt is really large (e.g. it's a large .i file), we may not be able
140   // to fit an arbitrary position in the file in the FilePos field.  To handle
141   // this, we create one FileID for each chunk of the file that fits in a
142   // FilePos field.
143   unsigned FileSize = File->Buffer->getBufferSize();
144   if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
145     FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File));
146     assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
147            "Ran out of file ID's!");
148     return FileIDs.size();
149   }
150 
151   // Create one FileID for each chunk of the file.
152   unsigned Result = FileIDs.size()+1;
153 
154   unsigned ChunkNo = 0;
155   while (1) {
156     FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File));
157 
158     if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
159     FileSize -= (1 << SourceLocation::FilePosBits);
160   }
161 
162   assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
163          "Ran out of file ID's!");
164   return Result;
165 }
166 
167 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
168 /// that a token from physloc PhysLoc should actually be referenced from
169 /// InstantiationLoc.
170 SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
171                                                   SourceLocation InstantLoc) {
172   // The specified source location may be a mapped location, due to a macro
173   // instantiation or #line directive.  Strip off this information to find out
174   // where the characters are actually located.
175   PhysLoc = getPhysicalLoc(PhysLoc);
176 
177   // Resolve InstantLoc down to a real logical location.
178   InstantLoc = getLogicalLoc(InstantLoc);
179 
180 
181   // If the last macro id is close to the currently requested location, try to
182   // reuse it.  This implements a small cache.
183   for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){
184     MacroIDInfo &LastOne = MacroIDs[i];
185 
186     // The instanitation point and source physloc have to exactly match to reuse
187     // (for now).  We could allow "nearby" instantiations in the future.
188     if (LastOne.getVirtualLoc() != InstantLoc ||
189         LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID())
190       continue;
191 
192     // Check to see if the physloc of the token came from near enough to reuse.
193     int PhysDelta = PhysLoc.getRawFilePos() -
194                     LastOne.getPhysicalLoc().getRawFilePos();
195     if (SourceLocation::isValidMacroPhysOffs(PhysDelta))
196       return SourceLocation::getMacroLoc(i, PhysDelta);
197   }
198 
199 
200   MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc));
201   return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0);
202 }
203 
204 /// getBufferData - Return a pointer to the start and end of the character
205 /// data for the specified FileID.
206 std::pair<const char*, const char*>
207 SourceManager::getBufferData(unsigned FileID) const {
208   const llvm::MemoryBuffer *Buf = getBuffer(FileID);
209   return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd());
210 }
211 
212 
213 /// getCharacterData - Return a pointer to the start of the specified location
214 /// in the appropriate MemoryBuffer.
215 const char *SourceManager::getCharacterData(SourceLocation SL) const {
216   // Note that this is a hot function in the getSpelling() path, which is
217   // heavily used by -E mode.
218   SL = getPhysicalLoc(SL);
219 
220   return getContentCache(SL.getFileID())->Buffer->getBufferStart() +
221          getFullFilePos(SL);
222 }
223 
224 
225 /// getColumnNumber - Return the column # for the specified file position.
226 /// this is significantly cheaper to compute than the line number.  This returns
227 /// zero if the column number isn't known.
228 unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
229   unsigned FileID = Loc.getFileID();
230   if (FileID == 0) return 0;
231 
232   unsigned FilePos = getFullFilePos(Loc);
233   const MemoryBuffer *Buffer = getBuffer(FileID);
234   const char *Buf = Buffer->getBufferStart();
235 
236   unsigned LineStart = FilePos;
237   while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
238     --LineStart;
239   return FilePos-LineStart+1;
240 }
241 
242 /// getSourceName - This method returns the name of the file or buffer that
243 /// the SourceLocation specifies.  This can be modified with #line directives,
244 /// etc.
245 const char *SourceManager::getSourceName(SourceLocation Loc) const {
246   unsigned FileID = Loc.getFileID();
247   if (FileID == 0) return "";
248   return getContentCache(FileID)->Buffer->getBufferIdentifier();
249 }
250 
251 static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE;
252 static void ComputeLineNumbers(ContentCache* FI) {
253   const MemoryBuffer *Buffer = FI->Buffer;
254 
255   // Find the file offsets of all of the *physical* source lines.  This does
256   // not look at trigraphs, escaped newlines, or anything else tricky.
257   std::vector<unsigned> LineOffsets;
258 
259   // Line #1 starts at char 0.
260   LineOffsets.push_back(0);
261 
262   const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
263   const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
264   unsigned Offs = 0;
265   while (1) {
266     // Skip over the contents of the line.
267     // TODO: Vectorize this?  This is very performance sensitive for programs
268     // with lots of diagnostics and in -E mode.
269     const unsigned char *NextBuf = (const unsigned char *)Buf;
270     while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
271       ++NextBuf;
272     Offs += NextBuf-Buf;
273     Buf = NextBuf;
274 
275     if (Buf[0] == '\n' || Buf[0] == '\r') {
276       // If this is \n\r or \r\n, skip both characters.
277       if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
278         ++Offs, ++Buf;
279       ++Offs, ++Buf;
280       LineOffsets.push_back(Offs);
281     } else {
282       // Otherwise, this is a null.  If end of file, exit.
283       if (Buf == End) break;
284       // Otherwise, skip the null.
285       ++Offs, ++Buf;
286     }
287   }
288 
289   // Copy the offsets into the FileInfo structure.
290   FI->NumLines = LineOffsets.size();
291   FI->SourceLineCache = new unsigned[LineOffsets.size()];
292   std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache);
293 }
294 
295 /// getLineNumber - Given a SourceLocation, return the physical line number
296 /// for the position indicated.  This requires building and caching a table of
297 /// line offsets for the MemoryBuffer, so this is not cheap: use only when
298 /// about to emit a diagnostic.
299 unsigned SourceManager::getLineNumber(SourceLocation Loc) {
300   unsigned FileID = Loc.getFileID();
301   if (FileID == 0) return 0;
302 
303   ContentCache* Content;
304 
305   if (LastLineNoFileIDQuery == FileID)
306     Content = LastLineNoContentCache;
307   else
308     Content = const_cast<ContentCache*>(getContentCache(FileID));
309 
310   // If this is the first use of line information for this buffer, compute the
311   /// SourceLineCache for it on demand.
312   if (Content->SourceLineCache == 0)
313     ComputeLineNumbers(Content);
314 
315   // Okay, we know we have a line number table.  Do a binary search to find the
316   // line number that this character position lands on.
317   unsigned *SourceLineCache = Content->SourceLineCache;
318   unsigned *SourceLineCacheStart = SourceLineCache;
319   unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines;
320 
321   unsigned QueriedFilePos = getFullFilePos(Loc)+1;
322 
323   // If the previous query was to the same file, we know both the file pos from
324   // that query and the line number returned.  This allows us to narrow the
325   // search space from the entire file to something near the match.
326   if (LastLineNoFileIDQuery == FileID) {
327     if (QueriedFilePos >= LastLineNoFilePos) {
328       SourceLineCache = SourceLineCache+LastLineNoResult-1;
329 
330       // The query is likely to be nearby the previous one.  Here we check to
331       // see if it is within 5, 10 or 20 lines.  It can be far away in cases
332       // where big comment blocks and vertical whitespace eat up lines but
333       // contribute no tokens.
334       if (SourceLineCache+5 < SourceLineCacheEnd) {
335         if (SourceLineCache[5] > QueriedFilePos)
336           SourceLineCacheEnd = SourceLineCache+5;
337         else if (SourceLineCache+10 < SourceLineCacheEnd) {
338           if (SourceLineCache[10] > QueriedFilePos)
339             SourceLineCacheEnd = SourceLineCache+10;
340           else if (SourceLineCache+20 < SourceLineCacheEnd) {
341             if (SourceLineCache[20] > QueriedFilePos)
342               SourceLineCacheEnd = SourceLineCache+20;
343           }
344         }
345       }
346     } else {
347       SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1;
348     }
349   }
350 
351   // If the spread is large, do a "radix" test as our initial guess, based on
352   // the assumption that lines average to approximately the same length.
353   // NOTE: This is currently disabled, as it does not appear to be profitable in
354   // initial measurements.
355   if (0 && SourceLineCacheEnd-SourceLineCache > 20) {
356     unsigned FileLen = Content->SourceLineCache[Content->NumLines-1];
357 
358     // Take a stab at guessing where it is.
359     unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen;
360 
361     // Check for -10 and +10 lines.
362     unsigned LowerBound = std::max(int(ApproxPos-10), 0);
363     unsigned UpperBound = std::min(ApproxPos+10, FileLen);
364 
365     // If the computed lower bound is less than the query location, move it in.
366     if (SourceLineCache < SourceLineCacheStart+LowerBound &&
367         SourceLineCacheStart[LowerBound] < QueriedFilePos)
368       SourceLineCache = SourceLineCacheStart+LowerBound;
369 
370     // If the computed upper bound is greater than the query location, move it.
371     if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound &&
372         SourceLineCacheStart[UpperBound] >= QueriedFilePos)
373       SourceLineCacheEnd = SourceLineCacheStart+UpperBound;
374   }
375 
376   unsigned *Pos
377     = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos);
378   unsigned LineNo = Pos-SourceLineCacheStart;
379 
380   LastLineNoFileIDQuery = FileID;
381   LastLineNoContentCache = Content;
382   LastLineNoFilePos = QueriedFilePos;
383   LastLineNoResult = LineNo;
384   return LineNo;
385 }
386 
387 /// PrintStats - Print statistics to stderr.
388 ///
389 void SourceManager::PrintStats() const {
390   llvm::cerr << "\n*** Source Manager Stats:\n";
391   llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
392              << " mem buffers mapped, " << FileIDs.size()
393              << " file ID's allocated.\n";
394   llvm::cerr << "  " << FileIDs.size() << " normal buffer FileID's, "
395              << MacroIDs.size() << " macro expansion FileID's.\n";
396 
397   unsigned NumLineNumsComputed = 0;
398   unsigned NumFileBytesMapped = 0;
399   for (std::set<ContentCache>::const_iterator I =
400        FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
401     NumLineNumsComputed += I->SourceLineCache != 0;
402     NumFileBytesMapped  += I->Buffer->getBufferSize();
403   }
404 
405   llvm::cerr << NumFileBytesMapped << " bytes of files mapped, "
406              << NumLineNumsComputed << " files with line #'s computed.\n";
407 }
408 
409 //===----------------------------------------------------------------------===//
410 // Serialization.
411 //===----------------------------------------------------------------------===//
412 
413 void ContentCache::Emit(llvm::Serializer& S) const {
414   S.FlushRecord();
415   S.EmitPtr(this);
416 
417   if (Entry) {
418     llvm::sys::Path Fname(Buffer->getBufferIdentifier());
419 
420     if (Fname.isAbsolute())
421       S.EmitCStr(Fname.c_str());
422     else {
423       // Create an absolute path.
424       // FIXME: This will potentially contain ".." and "." in the path.
425       llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory();
426       path.appendComponent(Fname.c_str());
427       S.EmitCStr(path.c_str());
428     }
429   }
430   else {
431     const char* p = Buffer->getBufferStart();
432     const char* e = Buffer->getBufferEnd();
433 
434     S.EmitInt(e-p);
435 
436     for ( ; p != e; ++p)
437       S.EmitInt(*p);
438   }
439 
440   S.FlushRecord();
441 }
442 
443 void ContentCache::ReadToSourceManager(llvm::Deserializer& D,
444                                        SourceManager& SMgr,
445                                        FileManager* FMgr,
446                                        std::vector<char>& Buf) {
447   if (FMgr) {
448     llvm::SerializedPtrID PtrID = D.ReadPtrID();
449     D.ReadCStr(Buf,false);
450 
451     // Create/fetch the FileEntry.
452     const char* start = &Buf[0];
453     const FileEntry* E = FMgr->getFile(start,start+Buf.size());
454 
455     // FIXME: Ideally we want a lazy materialization of the ContentCache
456     //  anyway, because we don't want to read in source files unless this
457     //  is absolutely needed.
458     if (!E)
459       D.RegisterPtr(PtrID,NULL);
460     else
461       // Get the ContextCache object and register it with the deserializer.
462       D.RegisterPtr(PtrID,SMgr.getContentCache(E));
463   }
464   else {
465     // Register the ContextCache object with the deserializer.
466     SMgr.MemBufferInfos.push_back(ContentCache());
467     ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back());
468     D.RegisterPtr(&Entry);
469 
470     // Create the buffer.
471     unsigned Size = D.ReadInt();
472     Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size);
473 
474     // Read the contents of the buffer.
475     char* p = const_cast<char*>(Entry.Buffer->getBufferStart());
476     for (unsigned i = 0; i < Size ; ++i)
477       p[i] = D.ReadInt();
478   }
479 }
480 
481 void FileIDInfo::Emit(llvm::Serializer& S) const {
482   S.Emit(IncludeLoc);
483   S.EmitInt(ChunkNo);
484   S.EmitPtr(Content);
485 }
486 
487 FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) {
488   FileIDInfo I;
489   I.IncludeLoc = SourceLocation::ReadVal(D);
490   I.ChunkNo = D.ReadInt();
491   D.ReadPtr(I.Content,false);
492   return I;
493 }
494 
495 void MacroIDInfo::Emit(llvm::Serializer& S) const {
496   S.Emit(VirtualLoc);
497   S.Emit(PhysicalLoc);
498 }
499 
500 MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) {
501   MacroIDInfo I;
502   I.VirtualLoc = SourceLocation::ReadVal(D);
503   I.PhysicalLoc = SourceLocation::ReadVal(D);
504   return I;
505 }
506 
507 void SourceManager::Emit(llvm::Serializer& S) const {
508   S.EnterBlock();
509   S.EmitPtr(this);
510   S.EmitInt(MainFileID);
511 
512   // Emit: FileInfos.  Just emit the file name.
513   S.EnterBlock();
514 
515   std::for_each(FileInfos.begin(),FileInfos.end(),
516                 S.MakeEmitter<ContentCache>());
517 
518   S.ExitBlock();
519 
520   // Emit: MemBufferInfos
521   S.EnterBlock();
522 
523   std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(),
524                 S.MakeEmitter<ContentCache>());
525 
526   S.ExitBlock();
527 
528   // Emit: FileIDs
529   S.EmitInt(FileIDs.size());
530   std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>());
531 
532   // Emit: MacroIDs
533   S.EmitInt(MacroIDs.size());
534   std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>());
535 
536   S.ExitBlock();
537 }
538 
539 SourceManager*
540 SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){
541   SourceManager *M = new SourceManager();
542   D.RegisterPtr(M);
543 
544   // Read: the FileID of the main source file of the translation unit.
545   M->MainFileID = D.ReadInt();
546 
547   std::vector<char> Buf;
548 
549   { // Read: FileInfos.
550     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
551     while (!D.FinishedBlock(BLoc))
552     ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf);
553   }
554 
555   { // Read: MemBufferInfos.
556     llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation();
557     while (!D.FinishedBlock(BLoc))
558     ContentCache::ReadToSourceManager(D,*M,NULL,Buf);
559   }
560 
561   // Read: FileIDs.
562   unsigned Size = D.ReadInt();
563   M->FileIDs.reserve(Size);
564   for (; Size > 0 ; --Size)
565     M->FileIDs.push_back(FileIDInfo::ReadVal(D));
566 
567   // Read: MacroIDs.
568   Size = D.ReadInt();
569   M->MacroIDs.reserve(Size);
570   for (; Size > 0 ; --Size)
571     M->MacroIDs.push_back(MacroIDInfo::ReadVal(D));
572 
573   return M;
574 }
575