1 //===--- SourceManager.cpp - Track and cache source files -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the SourceManager interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Basic/SourceManager.h" 15 #include "clang/Basic/FileManager.h" 16 #include "llvm/Config/config.h" 17 #include "llvm/Support/Compiler.h" 18 #include "llvm/Support/MemoryBuffer.h" 19 #include "llvm/System/Path.h" 20 #include "llvm/Bitcode/Serialize.h" 21 #include "llvm/Bitcode/Deserialize.h" 22 #include "llvm/Support/Streams.h" 23 #include <algorithm> 24 #include <fcntl.h> 25 using namespace clang; 26 using namespace SrcMgr; 27 using llvm::MemoryBuffer; 28 29 ContentCache::~ContentCache() { 30 delete Buffer; 31 delete [] SourceLineCache; 32 } 33 34 // FIXME: REMOVE THESE 35 #include <unistd.h> 36 #include <sys/types.h> 37 #if !defined(_MSC_VER) && !defined(__MINGW32__) 38 #include <sys/uio.h> 39 #include <sys/fcntl.h> 40 #else 41 #include <io.h> 42 #endif 43 #include <cerrno> 44 45 static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) { 46 #if 0 47 // FIXME: Reintroduce this and zap this function once the common llvm stuff 48 // is fast for the small case. 49 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()), 50 FileEnt->getSize()); 51 #endif 52 53 // If the file is larger than some threshold, use 'read', otherwise use mmap. 54 if (FileEnt->getSize() >= 4096*12) 55 return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()), 56 0, FileEnt->getSize()); 57 58 MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(), 59 FileEnt->getName()); 60 char *BufPtr = const_cast<char*>(SB->getBufferStart()); 61 62 #if defined(LLVM_ON_WIN32) 63 int FD = ::open(FileEnt->getName(), O_RDONLY|O_BINARY); 64 #else 65 int FD = ::open(FileEnt->getName(), O_RDONLY); 66 #endif 67 if (FD == -1) { 68 delete SB; 69 return 0; 70 } 71 72 unsigned BytesLeft = FileEnt->getSize(); 73 while (BytesLeft) { 74 ssize_t NumRead = ::read(FD, BufPtr, BytesLeft); 75 if (NumRead != -1) { 76 BytesLeft -= NumRead; 77 BufPtr += NumRead; 78 } else if (errno == EINTR) { 79 // try again 80 } else { 81 // error reading. 82 close(FD); 83 delete SB; 84 return 0; 85 } 86 } 87 close(FD); 88 89 return SB; 90 } 91 92 93 /// getFileInfo - Create or return a cached FileInfo for the specified file. 94 /// 95 const ContentCache* SourceManager::getContentCache(const FileEntry *FileEnt) { 96 97 assert(FileEnt && "Didn't specify a file entry to use?"); 98 // Do we already have information about this file? 99 std::set<ContentCache>::iterator I = 100 FileInfos.lower_bound(ContentCache(FileEnt)); 101 102 if (I != FileInfos.end() && I->Entry == FileEnt) 103 return &*I; 104 105 // Nope, get information. 106 const MemoryBuffer *File = ReadFileFast(FileEnt); 107 if (File == 0) 108 return 0; 109 110 ContentCache& Entry = const_cast<ContentCache&>(*FileInfos.insert(I,FileEnt)); 111 112 Entry.Buffer = File; 113 Entry.SourceLineCache = 0; 114 Entry.NumLines = 0; 115 return &Entry; 116 } 117 118 119 /// createMemBufferContentCache - Create a new ContentCache for the specified 120 /// memory buffer. This does no caching. 121 const ContentCache* 122 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { 123 // Add a new ContentCache to the MemBufferInfos list and return it. We 124 // must default construct the object first that the instance actually 125 // stored within MemBufferInfos actually owns the Buffer, and not any 126 // temporary we would use in the call to "push_back". 127 MemBufferInfos.push_back(ContentCache()); 128 ContentCache& Entry = const_cast<ContentCache&>(MemBufferInfos.back()); 129 Entry.Buffer = Buffer; 130 return &Entry; 131 } 132 133 134 /// createFileID - Create a new fileID for the specified ContentCache and 135 /// include position. This works regardless of whether the ContentCache 136 /// corresponds to a file or some other input source. 137 unsigned SourceManager::createFileID(const ContentCache *File, 138 SourceLocation IncludePos) { 139 // If FileEnt is really large (e.g. it's a large .i file), we may not be able 140 // to fit an arbitrary position in the file in the FilePos field. To handle 141 // this, we create one FileID for each chunk of the file that fits in a 142 // FilePos field. 143 unsigned FileSize = File->Buffer->getBufferSize(); 144 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) { 145 FileIDs.push_back(FileIDInfo::get(IncludePos, 0, File)); 146 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 147 "Ran out of file ID's!"); 148 return FileIDs.size(); 149 } 150 151 // Create one FileID for each chunk of the file. 152 unsigned Result = FileIDs.size()+1; 153 154 unsigned ChunkNo = 0; 155 while (1) { 156 FileIDs.push_back(FileIDInfo::get(IncludePos, ChunkNo++, File)); 157 158 if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break; 159 FileSize -= (1 << SourceLocation::FilePosBits); 160 } 161 162 assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) && 163 "Ran out of file ID's!"); 164 return Result; 165 } 166 167 /// getInstantiationLoc - Return a new SourceLocation that encodes the fact 168 /// that a token from physloc PhysLoc should actually be referenced from 169 /// InstantiationLoc. 170 SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc, 171 SourceLocation InstantLoc) { 172 // The specified source location may be a mapped location, due to a macro 173 // instantiation or #line directive. Strip off this information to find out 174 // where the characters are actually located. 175 PhysLoc = getPhysicalLoc(PhysLoc); 176 177 // Resolve InstantLoc down to a real logical location. 178 InstantLoc = getLogicalLoc(InstantLoc); 179 180 181 // If the last macro id is close to the currently requested location, try to 182 // reuse it. This implements a small cache. 183 for (int i = MacroIDs.size()-1, e = MacroIDs.size()-6; i >= 0 && i != e; --i){ 184 MacroIDInfo &LastOne = MacroIDs[i]; 185 186 // The instanitation point and source physloc have to exactly match to reuse 187 // (for now). We could allow "nearby" instantiations in the future. 188 if (LastOne.getVirtualLoc() != InstantLoc || 189 LastOne.getPhysicalLoc().getFileID() != PhysLoc.getFileID()) 190 continue; 191 192 // Check to see if the physloc of the token came from near enough to reuse. 193 int PhysDelta = PhysLoc.getRawFilePos() - 194 LastOne.getPhysicalLoc().getRawFilePos(); 195 if (SourceLocation::isValidMacroPhysOffs(PhysDelta)) 196 return SourceLocation::getMacroLoc(i, PhysDelta); 197 } 198 199 200 MacroIDs.push_back(MacroIDInfo::get(InstantLoc, PhysLoc)); 201 return SourceLocation::getMacroLoc(MacroIDs.size()-1, 0); 202 } 203 204 /// getBufferData - Return a pointer to the start and end of the character 205 /// data for the specified FileID. 206 std::pair<const char*, const char*> 207 SourceManager::getBufferData(unsigned FileID) const { 208 const llvm::MemoryBuffer *Buf = getBuffer(FileID); 209 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 210 } 211 212 213 /// getCharacterData - Return a pointer to the start of the specified location 214 /// in the appropriate MemoryBuffer. 215 const char *SourceManager::getCharacterData(SourceLocation SL) const { 216 // Note that this is a hot function in the getSpelling() path, which is 217 // heavily used by -E mode. 218 SL = getPhysicalLoc(SL); 219 220 return getContentCache(SL.getFileID())->Buffer->getBufferStart() + 221 getFullFilePos(SL); 222 } 223 224 225 /// getColumnNumber - Return the column # for the specified file position. 226 /// this is significantly cheaper to compute than the line number. This returns 227 /// zero if the column number isn't known. 228 unsigned SourceManager::getColumnNumber(SourceLocation Loc) const { 229 unsigned FileID = Loc.getFileID(); 230 if (FileID == 0) return 0; 231 232 unsigned FilePos = getFullFilePos(Loc); 233 const MemoryBuffer *Buffer = getBuffer(FileID); 234 const char *Buf = Buffer->getBufferStart(); 235 236 unsigned LineStart = FilePos; 237 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') 238 --LineStart; 239 return FilePos-LineStart+1; 240 } 241 242 /// getSourceName - This method returns the name of the file or buffer that 243 /// the SourceLocation specifies. This can be modified with #line directives, 244 /// etc. 245 const char *SourceManager::getSourceName(SourceLocation Loc) const { 246 unsigned FileID = Loc.getFileID(); 247 if (FileID == 0) return ""; 248 return getContentCache(FileID)->Buffer->getBufferIdentifier(); 249 } 250 251 static void ComputeLineNumbers(ContentCache* FI) DISABLE_INLINE; 252 static void ComputeLineNumbers(ContentCache* FI) { 253 const MemoryBuffer *Buffer = FI->Buffer; 254 255 // Find the file offsets of all of the *physical* source lines. This does 256 // not look at trigraphs, escaped newlines, or anything else tricky. 257 std::vector<unsigned> LineOffsets; 258 259 // Line #1 starts at char 0. 260 LineOffsets.push_back(0); 261 262 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); 263 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); 264 unsigned Offs = 0; 265 while (1) { 266 // Skip over the contents of the line. 267 // TODO: Vectorize this? This is very performance sensitive for programs 268 // with lots of diagnostics and in -E mode. 269 const unsigned char *NextBuf = (const unsigned char *)Buf; 270 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') 271 ++NextBuf; 272 Offs += NextBuf-Buf; 273 Buf = NextBuf; 274 275 if (Buf[0] == '\n' || Buf[0] == '\r') { 276 // If this is \n\r or \r\n, skip both characters. 277 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) 278 ++Offs, ++Buf; 279 ++Offs, ++Buf; 280 LineOffsets.push_back(Offs); 281 } else { 282 // Otherwise, this is a null. If end of file, exit. 283 if (Buf == End) break; 284 // Otherwise, skip the null. 285 ++Offs, ++Buf; 286 } 287 } 288 289 // Copy the offsets into the FileInfo structure. 290 FI->NumLines = LineOffsets.size(); 291 FI->SourceLineCache = new unsigned[LineOffsets.size()]; 292 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); 293 } 294 295 /// getLineNumber - Given a SourceLocation, return the physical line number 296 /// for the position indicated. This requires building and caching a table of 297 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 298 /// about to emit a diagnostic. 299 unsigned SourceManager::getLineNumber(SourceLocation Loc) { 300 unsigned FileID = Loc.getFileID(); 301 if (FileID == 0) return 0; 302 303 ContentCache* Content; 304 305 if (LastLineNoFileIDQuery == FileID) 306 Content = LastLineNoContentCache; 307 else 308 Content = const_cast<ContentCache*>(getContentCache(FileID)); 309 310 // If this is the first use of line information for this buffer, compute the 311 /// SourceLineCache for it on demand. 312 if (Content->SourceLineCache == 0) 313 ComputeLineNumbers(Content); 314 315 // Okay, we know we have a line number table. Do a binary search to find the 316 // line number that this character position lands on. 317 unsigned *SourceLineCache = Content->SourceLineCache; 318 unsigned *SourceLineCacheStart = SourceLineCache; 319 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; 320 321 unsigned QueriedFilePos = getFullFilePos(Loc)+1; 322 323 // If the previous query was to the same file, we know both the file pos from 324 // that query and the line number returned. This allows us to narrow the 325 // search space from the entire file to something near the match. 326 if (LastLineNoFileIDQuery == FileID) { 327 if (QueriedFilePos >= LastLineNoFilePos) { 328 SourceLineCache = SourceLineCache+LastLineNoResult-1; 329 330 // The query is likely to be nearby the previous one. Here we check to 331 // see if it is within 5, 10 or 20 lines. It can be far away in cases 332 // where big comment blocks and vertical whitespace eat up lines but 333 // contribute no tokens. 334 if (SourceLineCache+5 < SourceLineCacheEnd) { 335 if (SourceLineCache[5] > QueriedFilePos) 336 SourceLineCacheEnd = SourceLineCache+5; 337 else if (SourceLineCache+10 < SourceLineCacheEnd) { 338 if (SourceLineCache[10] > QueriedFilePos) 339 SourceLineCacheEnd = SourceLineCache+10; 340 else if (SourceLineCache+20 < SourceLineCacheEnd) { 341 if (SourceLineCache[20] > QueriedFilePos) 342 SourceLineCacheEnd = SourceLineCache+20; 343 } 344 } 345 } 346 } else { 347 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; 348 } 349 } 350 351 // If the spread is large, do a "radix" test as our initial guess, based on 352 // the assumption that lines average to approximately the same length. 353 // NOTE: This is currently disabled, as it does not appear to be profitable in 354 // initial measurements. 355 if (0 && SourceLineCacheEnd-SourceLineCache > 20) { 356 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; 357 358 // Take a stab at guessing where it is. 359 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; 360 361 // Check for -10 and +10 lines. 362 unsigned LowerBound = std::max(int(ApproxPos-10), 0); 363 unsigned UpperBound = std::min(ApproxPos+10, FileLen); 364 365 // If the computed lower bound is less than the query location, move it in. 366 if (SourceLineCache < SourceLineCacheStart+LowerBound && 367 SourceLineCacheStart[LowerBound] < QueriedFilePos) 368 SourceLineCache = SourceLineCacheStart+LowerBound; 369 370 // If the computed upper bound is greater than the query location, move it. 371 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && 372 SourceLineCacheStart[UpperBound] >= QueriedFilePos) 373 SourceLineCacheEnd = SourceLineCacheStart+UpperBound; 374 } 375 376 unsigned *Pos 377 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); 378 unsigned LineNo = Pos-SourceLineCacheStart; 379 380 LastLineNoFileIDQuery = FileID; 381 LastLineNoContentCache = Content; 382 LastLineNoFilePos = QueriedFilePos; 383 LastLineNoResult = LineNo; 384 return LineNo; 385 } 386 387 /// PrintStats - Print statistics to stderr. 388 /// 389 void SourceManager::PrintStats() const { 390 llvm::cerr << "\n*** Source Manager Stats:\n"; 391 llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() 392 << " mem buffers mapped, " << FileIDs.size() 393 << " file ID's allocated.\n"; 394 llvm::cerr << " " << FileIDs.size() << " normal buffer FileID's, " 395 << MacroIDs.size() << " macro expansion FileID's.\n"; 396 397 unsigned NumLineNumsComputed = 0; 398 unsigned NumFileBytesMapped = 0; 399 for (std::set<ContentCache>::const_iterator I = 400 FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { 401 NumLineNumsComputed += I->SourceLineCache != 0; 402 NumFileBytesMapped += I->Buffer->getBufferSize(); 403 } 404 405 llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " 406 << NumLineNumsComputed << " files with line #'s computed.\n"; 407 } 408 409 //===----------------------------------------------------------------------===// 410 // Serialization. 411 //===----------------------------------------------------------------------===// 412 413 void ContentCache::Emit(llvm::Serializer& S) const { 414 S.FlushRecord(); 415 S.EmitPtr(this); 416 417 if (Entry) { 418 llvm::sys::Path Fname(Buffer->getBufferIdentifier()); 419 420 if (Fname.isAbsolute()) 421 S.EmitCStr(Fname.c_str()); 422 else { 423 // Create an absolute path. 424 // FIXME: This will potentially contain ".." and "." in the path. 425 llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); 426 path.appendComponent(Fname.c_str()); 427 S.EmitCStr(path.c_str()); 428 } 429 } 430 else { 431 const char* p = Buffer->getBufferStart(); 432 const char* e = Buffer->getBufferEnd(); 433 434 S.EmitInt(e-p); 435 436 for ( ; p != e; ++p) 437 S.EmitInt(*p); 438 } 439 440 S.FlushRecord(); 441 } 442 443 void ContentCache::ReadToSourceManager(llvm::Deserializer& D, 444 SourceManager& SMgr, 445 FileManager* FMgr, 446 std::vector<char>& Buf) { 447 if (FMgr) { 448 llvm::SerializedPtrID PtrID = D.ReadPtrID(); 449 D.ReadCStr(Buf,false); 450 451 // Create/fetch the FileEntry. 452 const char* start = &Buf[0]; 453 const FileEntry* E = FMgr->getFile(start,start+Buf.size()); 454 455 // FIXME: Ideally we want a lazy materialization of the ContentCache 456 // anyway, because we don't want to read in source files unless this 457 // is absolutely needed. 458 if (!E) 459 D.RegisterPtr(PtrID,NULL); 460 else 461 // Get the ContextCache object and register it with the deserializer. 462 D.RegisterPtr(PtrID,SMgr.getContentCache(E)); 463 } 464 else { 465 // Register the ContextCache object with the deserializer. 466 SMgr.MemBufferInfos.push_back(ContentCache()); 467 ContentCache& Entry = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); 468 D.RegisterPtr(&Entry); 469 470 // Create the buffer. 471 unsigned Size = D.ReadInt(); 472 Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); 473 474 // Read the contents of the buffer. 475 char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); 476 for (unsigned i = 0; i < Size ; ++i) 477 p[i] = D.ReadInt(); 478 } 479 } 480 481 void FileIDInfo::Emit(llvm::Serializer& S) const { 482 S.Emit(IncludeLoc); 483 S.EmitInt(ChunkNo); 484 S.EmitPtr(Content); 485 } 486 487 FileIDInfo FileIDInfo::ReadVal(llvm::Deserializer& D) { 488 FileIDInfo I; 489 I.IncludeLoc = SourceLocation::ReadVal(D); 490 I.ChunkNo = D.ReadInt(); 491 D.ReadPtr(I.Content,false); 492 return I; 493 } 494 495 void MacroIDInfo::Emit(llvm::Serializer& S) const { 496 S.Emit(VirtualLoc); 497 S.Emit(PhysicalLoc); 498 } 499 500 MacroIDInfo MacroIDInfo::ReadVal(llvm::Deserializer& D) { 501 MacroIDInfo I; 502 I.VirtualLoc = SourceLocation::ReadVal(D); 503 I.PhysicalLoc = SourceLocation::ReadVal(D); 504 return I; 505 } 506 507 void SourceManager::Emit(llvm::Serializer& S) const { 508 S.EnterBlock(); 509 S.EmitPtr(this); 510 S.EmitInt(MainFileID); 511 512 // Emit: FileInfos. Just emit the file name. 513 S.EnterBlock(); 514 515 std::for_each(FileInfos.begin(),FileInfos.end(), 516 S.MakeEmitter<ContentCache>()); 517 518 S.ExitBlock(); 519 520 // Emit: MemBufferInfos 521 S.EnterBlock(); 522 523 std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), 524 S.MakeEmitter<ContentCache>()); 525 526 S.ExitBlock(); 527 528 // Emit: FileIDs 529 S.EmitInt(FileIDs.size()); 530 std::for_each(FileIDs.begin(), FileIDs.end(), S.MakeEmitter<FileIDInfo>()); 531 532 // Emit: MacroIDs 533 S.EmitInt(MacroIDs.size()); 534 std::for_each(MacroIDs.begin(), MacroIDs.end(), S.MakeEmitter<MacroIDInfo>()); 535 536 S.ExitBlock(); 537 } 538 539 SourceManager* 540 SourceManager::CreateAndRegister(llvm::Deserializer& D, FileManager& FMgr){ 541 SourceManager *M = new SourceManager(); 542 D.RegisterPtr(M); 543 544 // Read: the FileID of the main source file of the translation unit. 545 M->MainFileID = D.ReadInt(); 546 547 std::vector<char> Buf; 548 549 { // Read: FileInfos. 550 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 551 while (!D.FinishedBlock(BLoc)) 552 ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); 553 } 554 555 { // Read: MemBufferInfos. 556 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 557 while (!D.FinishedBlock(BLoc)) 558 ContentCache::ReadToSourceManager(D,*M,NULL,Buf); 559 } 560 561 // Read: FileIDs. 562 unsigned Size = D.ReadInt(); 563 M->FileIDs.reserve(Size); 564 for (; Size > 0 ; --Size) 565 M->FileIDs.push_back(FileIDInfo::ReadVal(D)); 566 567 // Read: MacroIDs. 568 Size = D.ReadInt(); 569 M->MacroIDs.reserve(Size); 570 for (; Size > 0 ; --Size) 571 M->MacroIDs.push_back(MacroIDInfo::ReadVal(D)); 572 573 return M; 574 } 575