1 //===--- SourceManager.cpp - Track and cache source files -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the SourceManager interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Basic/SourceManager.h" 15 #include "clang/Basic/FileManager.h" 16 #include "llvm/Support/Compiler.h" 17 #include "llvm/Support/MemoryBuffer.h" 18 #include "llvm/System/Path.h" 19 #include "llvm/Bitcode/Serialize.h" 20 #include "llvm/Bitcode/Deserialize.h" 21 #include "llvm/Support/Streams.h" 22 #include <algorithm> 23 using namespace clang; 24 using namespace SrcMgr; 25 using llvm::MemoryBuffer; 26 27 ContentCache::~ContentCache() { 28 delete Buffer; 29 delete [] SourceLineCache; 30 } 31 32 /// getFileInfo - Create or return a cached FileInfo for the specified file. 33 /// 34 const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) { 35 36 assert(FileEnt && "Didn't specify a file entry to use?"); 37 // Do we already have information about this file? 38 std::set<ContentCache>::iterator I = 39 FileInfos.lower_bound(ContentCache(FileEnt)); 40 41 if (I != FileInfos.end() && I->Entry == FileEnt) 42 return &*I; 43 44 // Nope, get information. 45 const MemoryBuffer *File = 46 MemoryBuffer::getFile(FileEnt->getName(), 0, FileEnt->getSize()); 47 if (File == 0) 48 return 0; 49 50 ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt)); 51 52 Entry.Buffer = File; 53 Entry.SourceLineCache = 0; 54 Entry.NumLines = 0; 55 return &Entry; 56 } 57 58 59 /// createMemBufferContentCache - Create a new ContentCache for the specified 60 /// memory buffer. This does no caching. 61 const ContentCache* 62 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { 63 // Add a new ContentCache to the MemBufferInfos list and return it. We 64 // must default construct the object first that the instance actually 65 // stored within MemBufferInfos actually owns the Buffer, and not any 66 // temporary we would use in the call to "push_back". 67 MemBufferInfos.push_back(ContentCache()); 68 ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back()); 69 Entry.Buffer = Buffer; 70 return &Entry; 71 } 72 73 74 /// createFileID - Create a new fileID for the specified ContentCache and 75 /// include position. This works regardless of whether the ContentCache 76 /// corresponds to a file or some other input source. 77 unsigned SourceManager::createFileID(const ContentCache *File, 78 SourceLocation IncludePos) { 79 // If FileEnt is really large (e.g. it's a large .i file), we may not be able 80 // to fit an arbitrary position in the file in the FilePos field. To handle 81 // this, we create one FileID for each chunk of the file that fits in a 82 // FilePos field. 83 unsigned FileSize = File->Buffer->getBufferSize(); 84 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { 85 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File)); 86 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 87 "Ran out of file ID's!"); 88 return FileIDs.size(); 89 } 90 91 // Create one FileID for each chunk of the file. 92 unsigned Result = FileIDs.size()+1; 93 94 unsigned ChunkNo = 0; 95 while (1) { 96 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File)); 97 98 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; 99 FileSize -= (1 << SourceLocation::FilePosBits); 100 } 101 102 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 103 "Ran out of file ID's!"); 104 return Result; 105 } 106 107 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 108 /// that a token from physloc PhysLoc should actually be referenced from 109 /// InstantiationLoc. 110 SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc, 111 SourceLocation InstantLoc) { 112 // The specified source location may be a mapped location, due to a macro 113 // instantiation or #line directive. Strip off this information to find out 114 // where the characters are actually located. 115 PhysLoc = getPhysicalLoc(PhysLoc); 116 117 // Resolve InstantLoc down to a real logical location. 118 InstantLoc = getLogicalLoc(InstantLoc); 119 120 121 // If the last macro id is close to the currently requested location, try to 122 // reuse it. This implements a small cache. 123 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){ 124 MacroIDInfo &LastOne = MacroIDs[i]; 125 126 // The instanitation point and source physloc have to exactly match to reuse 127 // (for now). We could allow "nearby" instantiations in the future. 128 if (LastOne.getVirtualLoc() != InstantLoc || 129 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID()) 130 continue; 131 132 // Check to see if the physloc of the token came from near enough to reuse. 133 int PhysDelta = PhysLoc.getRawFilePos() - 134 LastOne.getPhysicalLoc().getRawFilePos(); 135 if (SourceLocation::isValidMacroPhysOffs(PhysDelta)) 136 return SourceLocation::getMacroLoc(i, PhysDelta); 137 } 138 139 140 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc)); 141 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0); 142 } 143 144 /// getBufferData - Return a pointer to the start and end of the character 145 /// data for the specified FileID. 146 std::pair<const char*, const char*> 147 SourceManager::getBufferData(unsigned FileID) const { 148 const llvm::MemoryBuffer *Buf = getBuffer(FileID); 149 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 150 } 151 152 153 /// getCharacterData - Return a pointer to the start of the specified location 154 /// in the appropriate MemoryBuffer. 155 const char *SourceManager::getCharacterData(SourceLocation SL) const { 156 // Note that this is a hot function in the getSpelling() path, which is 157 // heavily used by -E mode. 158 SL = getPhysicalLoc(SL); 159 160 return getContentCache(SL.getFileID())->Buffer->getBufferStart() + 161 getFullFilePos(SL); 162 } 163 164 165 /// getColumnNumber - Return the column # for the specified file position. 166 /// this is significantly cheaper to compute than the line number. This returns 167 /// zero if the column number isn't known. 168 unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { 169 unsigned FileID = Loc.getFileID(); 170 if (FileID == 0) return 0; 171 172 unsigned FilePos = getFullFilePos(Loc); 173 const MemoryBuffer *Buffer = getBuffer(FileID); 174 const char *Buf = Buffer->getBufferStart(); 175 176 unsigned LineStart = FilePos; 177 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') 178 --LineStart; 179 return FilePos-LineStart+1; 180 } 181 182 /// getSourceName - This method returns the name of the file or buffer that 183 /// the SourceLocation specifies. This can be modified with #line directives, 184 /// etc. 185 const char *SourceManager::getSourceName(SourceLocation Loc) const { 186 unsigned FileID = Loc.getFileID(); 187 if (FileID == 0) return ""; 188 return getContentCache(FileID)->Buffer->getBufferIdentifier(); 189 } 190 191 static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE; 192 static void ComputeLineNumbers(ContentCache* FI) { 193 const MemoryBuffer *Buffer = FI->Buffer; 194 195 // Find the file offsets of all of the *physical* source lines. This does 196 // not look at trigraphs, escaped newlines, or anything else tricky. 197 std::vector<unsigned> LineOffsets; 198 199 // Line #1 starts at char 0. 200 LineOffsets.push_back(0); 201 202 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); 203 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); 204 unsigned Offs = 0; 205 while (1) { 206 // Skip over the contents of the line. 207 // TODO: Vectorize this? This is very performance sensitive for programs 208 // with lots of diagnostics and in -E mode. 209 const unsigned char *NextBuf = (const unsigned char *)Buf; 210 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') 211 ++NextBuf; 212 Offs += NextBuf-Buf; 213 Buf = NextBuf; 214 215 if (Buf[0] == '\n' || Buf[0] == '\r') { 216 // If this is \n\r or \r\n, skip both characters. 217 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) 218 ++Offs, ++Buf; 219 ++Offs, ++Buf; 220 LineOffsets.push_back(Offs); 221 } else { 222 // Otherwise, this is a null. If end of file, exit. 223 if (Buf == End) break; 224 // Otherwise, skip the null. 225 ++Offs, ++Buf; 226 } 227 } 228 229 // Copy the offsets into the FileInfo structure. 230 FI->NumLines = LineOffsets.size(); 231 FI->SourceLineCache = new unsigned[LineOffsets.size()]; 232 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); 233 } 234 235 /// getLineNumber - Given a SourceLocation, return the physical line number 236 /// for the position indicated. This requires building and caching a table of 237 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 238 /// about to emit a diagnostic. 239 unsigned SourceManager::getLineNumber(SourceLocation Loc) { 240 unsigned FileID = Loc.getFileID(); 241 if (FileID == 0) return 0; 242 243 ContentCache* Content; 244 245 if (LastLineNoFileIDQuery == FileID) 246 Content = LastLineNoContentCache; 247 else 248 Content = const_cast<ContentCache*>(getContentCache(FileID)); 249 250 // If this is the first use of line information for this buffer, compute the 251 /// SourceLineCache for it on demand. 252 if (Content->SourceLineCache == 0) 253 ComputeLineNumbers(Content); 254 255 // Okay, we know we have a line number table. Do a binary search to find the 256 // line number that this character position lands on. 257 unsigned *SourceLineCache = Content->SourceLineCache; 258 unsigned *SourceLineCacheStart = SourceLineCache; 259 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; 260 261 unsigned QueriedFilePos = getFullFilePos(Loc)+1; 262 263 // If the previous query was to the same file, we know both the file pos from 264 // that query and the line number returned. This allows us to narrow the 265 // search space from the entire file to something near the match. 266 if (LastLineNoFileIDQuery == FileID) { 267 if (QueriedFilePos >= LastLineNoFilePos) { 268 SourceLineCache = SourceLineCache+LastLineNoResult-1; 269 270 // The query is likely to be nearby the previous one. Here we check to 271 // see if it is within 5, 10 or 20 lines. It can be far away in cases 272 // where big comment blocks and vertical whitespace eat up lines but 273 // contribute no tokens. 274 if (SourceLineCache+5 < SourceLineCacheEnd) { 275 if (SourceLineCache[5] > QueriedFilePos) 276 SourceLineCacheEnd = SourceLineCache+5; 277 else if (SourceLineCache+10 < SourceLineCacheEnd) { 278 if (SourceLineCache[10] > QueriedFilePos) 279 SourceLineCacheEnd = SourceLineCache+10; 280 else if (SourceLineCache+20 < SourceLineCacheEnd) { 281 if (SourceLineCache[20] > QueriedFilePos) 282 SourceLineCacheEnd = SourceLineCache+20; 283 } 284 } 285 } 286 } else { 287 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; 288 } 289 } 290 291 // If the spread is large, do a "radix" test as our initial guess, based on 292 // the assumption that lines average to approximately the same length. 293 // NOTE: This is currently disabled, as it does not appear to be profitable in 294 // initial measurements. 295 if (0 && SourceLineCacheEnd-SourceLineCache > 20) { 296 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; 297 298 // Take a stab at guessing where it is. 299 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; 300 301 // Check for -10 and +10 lines. 302 unsigned LowerBound = std::max(int(ApproxPos-10), 0); 303 unsigned UpperBound = std::min(ApproxPos+10, FileLen); 304 305 // If the computed lower bound is less than the query location, move it in. 306 if (SourceLineCache < SourceLineCacheStart+LowerBound && 307 SourceLineCacheStart[LowerBound] < QueriedFilePos) 308 SourceLineCache = SourceLineCacheStart+LowerBound; 309 310 // If the computed upper bound is greater than the query location, move it. 311 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && 312 SourceLineCacheStart[UpperBound] >= QueriedFilePos) 313 SourceLineCacheEnd = SourceLineCacheStart+UpperBound; 314 } 315 316 unsigned *Pos 317 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); 318 unsigned LineNo = Pos-SourceLineCacheStart; 319 320 LastLineNoFileIDQuery = FileID; 321 LastLineNoContentCache = Content; 322 LastLineNoFilePos = QueriedFilePos; 323 LastLineNoResult = LineNo; 324 return LineNo; 325 } 326 327 /// PrintStats - Print statistics to stderr. 328 /// 329 void SourceManager::PrintStats() const { 330 llvm::cerr << "\n*** Source Manager Stats:\n"; 331 llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() 332 << " mem buffers mapped, " << FileIDs.size() 333 << " file ID's allocated.\n"; 334 llvm::cerr << " " << FileIDs.size() << " normal buffer FileID's, " 335 << MacroIDs.size() << " macro expansion FileID's.\n"; 336 337 unsigned NumLineNumsComputed = 0; 338 unsigned NumFileBytesMapped = 0; 339 for (std::set<ContentCache>::const_iterator I = 340 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { 341 NumLineNumsComputed += I->SourceLineCache != 0; 342 NumFileBytesMapped += I->Buffer->getBufferSize(); 343 } 344 345 llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " 346 << NumLineNumsComputed << " files with line #'s computed.\n"; 347 } 348 349 //===----------------------------------------------------------------------===// 350 // Serialization. 351 //===----------------------------------------------------------------------===// 352 353 void ContentCache::Emit(llvm::Serializer& S) const { 354 S.FlushRecord(); 355 S.EmitPtr(this); 356 357 if (Entry) { 358 llvm::sys::Path Fname(Buffer->getBufferIdentifier()); 359 360 if (Fname.isAbsolute()) 361 S.EmitCStr(Fname.c_str()); 362 else { 363 // Create an absolute path. 364 // FIXME: This will potentially contain ".." and "." in the path. 365 llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); 366 path.appendComponent(Fname.c_str()); 367 S.EmitCStr(path.c_str()); 368 } 369 } 370 else { 371 const char* p = Buffer->getBufferStart(); 372 const char* e = Buffer->getBufferEnd(); 373 374 S.EmitInt(e-p); 375 376 for ( ; p != e; ++p) 377 S.EmitInt(*p); 378 } 379 380 S.FlushRecord(); 381 } 382 383 void ContentCache::ReadToSourceManager(llvm::Deserializer& D, 384 SourceManager& SMgr, 385 FileManager* FMgr, 386 std::vector<char>& Buf) { 387 if (FMgr) { 388 llvm::SerializedPtrID PtrID = D.ReadPtrID(); 389 D.ReadCStr(Buf,false); 390 391 // Create/fetch the FileEntry. 392 const char* start = &Buf[0]; 393 const FileEntry* E = FMgr->getFile(start,start+Buf.size()); 394 395 // FIXME: Ideally we want a lazy materialization of the ContentCache 396 // anyway, because we don't want to read in source files unless this 397 // is absolutely needed. 398 if (!E) 399 D.RegisterPtr(PtrID,NULL); 400 else 401 // Get the ContextCache object and register it with the deserializer. 402 D.RegisterPtr(PtrID,SMgr.getContentCache(E)); 403 } 404 else { 405 // Register the ContextCache object with the deserializer. 406 SMgr.MemBufferInfos.push_back(ContentCache()); 407 ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); 408 D.RegisterPtr(&Entry); 409 410 // Create the buffer. 411 unsigned Size = D.ReadInt(); 412 Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); 413 414 // Read the contents of the buffer. 415 char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); 416 for (unsigned i = 0; i < Size ; ++i) 417 p[i] = D.ReadInt(); 418 } 419 } 420 421 void FileIDInfo::Emit(llvm::Serializer& S) const { 422 S.Emit(IncludeLoc); 423 S.EmitInt(ChunkNo); 424 S.EmitPtr(Content); 425 } 426 427 FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) { 428 FileIDInfo I; 429 I.IncludeLoc = SourceLocation::ReadVal(D); 430 I.ChunkNo = D.ReadInt(); 431 D.ReadPtr(I.Content,false); 432 return I; 433 } 434 435 void MacroIDInfo::Emit(llvm::Serializer& S) const { 436 S.Emit(VirtualLoc); 437 S.Emit(PhysicalLoc); 438 } 439 440 MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) { 441 MacroIDInfo I; 442 I.VirtualLoc = SourceLocation::ReadVal(D); 443 I.PhysicalLoc = SourceLocation::ReadVal(D); 444 return I; 445 } 446 447 void SourceManager::Emit(llvm::Serializer& S) const { 448 S.EnterBlock(); 449 S.EmitPtr(this); 450 S.EmitInt(MainFileID); 451 452 // Emit: FileInfos. Just emit the file name. 453 S.EnterBlock(); 454 455 std::for_each(FileInfos.begin(),FileInfos.end(), 456 S.MakeEmitter<ContentCache>()); 457 458 S.ExitBlock(); 459 460 // Emit: MemBufferInfos 461 S.EnterBlock(); 462 463 std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), 464 S.MakeEmitter<ContentCache>()); 465 466 S.ExitBlock(); 467 468 // Emit: FileIDs 469 S.EmitInt(FileIDs.size()); 470 std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>()); 471 472 // Emit: MacroIDs 473 S.EmitInt(MacroIDs.size()); 474 std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>()); 475 476 S.ExitBlock(); 477 } 478 479 SourceManager* 480 SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ 481 SourceManager *M = new SourceManager(); 482 D.RegisterPtr(M); 483 484 // Read: the FileID of the main source file of the translation unit. 485 M->MainFileID = D.ReadInt(); 486 487 std::vector<char> Buf; 488 489 { // Read: FileInfos. 490 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 491 while (!D.FinishedBlock(BLoc)) 492 ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); 493 } 494 495 { // Read: MemBufferInfos. 496 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 497 while (!D.FinishedBlock(BLoc)) 498 ContentCache::ReadToSourceManager(D,*M,NULL,Buf); 499 } 500 501 // Read: FileIDs. 502 unsigned Size = D.ReadInt(); 503 M->FileIDs.reserve(Size); 504 for (; Size > 0 ; --Size) 505 M->FileIDs.push_back(FileIDInfo::ReadVal(D)); 506 507 // Read: MacroIDs. 508 Size = D.ReadInt(); 509 M->MacroIDs.reserve(Size); 510 for (; Size > 0 ; --Size) 511 M->MacroIDs.push_back(MacroIDInfo::ReadVal(D)); 512 513 return M; 514 } 515