1 //===--- SourceManager.cpp - Track and cache source files -----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the SourceManager interface. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Basic/SourceManager.h" 15 #include "clang/Basic/FileManager.h" 16 #include "llvm/Support/Compiler.h" 17 #include "llvm/Support/MemoryBuffer.h" 18 #include "llvm/System/Path.h" 19 #include "llvm/Bitcode/Serialize.h" 20 #include "llvm/Bitcode/Deserialize.h" 21 #include "llvm/Support/Streams.h" 22 #include <algorithm> 23 using namespace clang; 24 using namespace SrcMgr; 25 using llvm::MemoryBuffer; 26 27 //===----------------------------------------------------------------------===// 28 // SourceManager Helper Classes 29 //===----------------------------------------------------------------------===// 30 31 ContentCache::~ContentCache() { 32 delete Buffer; 33 } 34 35 /// getSizeBytesMapped - Returns the number of bytes actually mapped for 36 /// this ContentCache. This can be 0 if the MemBuffer was not actually 37 /// instantiated. 38 unsigned ContentCache::getSizeBytesMapped() const { 39 return Buffer ? Buffer->getBufferSize() : 0; 40 } 41 42 /// getSize - Returns the size of the content encapsulated by this ContentCache. 43 /// This can be the size of the source file or the size of an arbitrary 44 /// scratch buffer. If the ContentCache encapsulates a source file, that 45 /// file is not lazily brought in from disk to satisfy this query. 46 unsigned ContentCache::getSize() const { 47 return Entry ? Entry->getSize() : Buffer->getBufferSize(); 48 } 49 50 const llvm::MemoryBuffer *ContentCache::getBuffer() const { 51 // Lazily create the Buffer for ContentCaches that wrap files. 52 if (!Buffer && Entry) { 53 // FIXME: Should we support a way to not have to do this check over 54 // and over if we cannot open the file? 55 Buffer = MemoryBuffer::getFile(Entry->getName(), 0, Entry->getSize()); 56 } 57 return Buffer; 58 } 59 60 //===----------------------------------------------------------------------===// 61 // Line Table Implementation 62 //===----------------------------------------------------------------------===// 63 64 namespace clang { 65 struct LineEntry { 66 /// FileOffset - The offset in this file that the line entry occurs at. 67 unsigned FileOffset; 68 69 /// LineNo - The presumed line number of this line entry: #line 4. 70 unsigned LineNo; 71 72 /// FilenameID - The ID of the filename identified by this line entry: 73 /// #line 4 "foo.c". This is -1 if not specified. 74 int FilenameID; 75 76 /// Flags - Set the 0 if no flags, 1 if a system header, 77 SrcMgr::CharacteristicKind FileKind; 78 79 /// IncludeOffset - This is the offset of the virtual include stack location, 80 /// which is manipulated by GNU linemarker directives. If this is 0 then 81 /// there is no virtual #includer. 82 unsigned IncludeOffset; 83 84 static LineEntry get(unsigned Offs, unsigned Line, int Filename, 85 SrcMgr::CharacteristicKind FileKind, 86 unsigned IncludeOffset) { 87 LineEntry E; 88 E.FileOffset = Offs; 89 E.LineNo = Line; 90 E.FilenameID = Filename; 91 E.FileKind = FileKind; 92 E.IncludeOffset = IncludeOffset; 93 return E; 94 } 95 }; 96 97 // needed for FindNearestLineEntry (upper_bound of LineEntry) 98 inline bool operator<(const LineEntry &lhs, const LineEntry &rhs) { 99 // FIXME: should check the other field? 100 return lhs.FileOffset < rhs.FileOffset; 101 } 102 103 inline bool operator<(const LineEntry &E, unsigned Offset) { 104 return E.FileOffset < Offset; 105 } 106 107 inline bool operator<(unsigned Offset, const LineEntry &E) { 108 return Offset < E.FileOffset; 109 } 110 111 /// LineTableInfo - This class is used to hold and unique data used to 112 /// represent #line information. 113 class LineTableInfo { 114 /// FilenameIDs - This map is used to assign unique IDs to filenames in 115 /// #line directives. This allows us to unique the filenames that 116 /// frequently reoccur and reference them with indices. FilenameIDs holds 117 /// the mapping from string -> ID, and FilenamesByID holds the mapping of ID 118 /// to string. 119 llvm::StringMap<unsigned, llvm::BumpPtrAllocator> FilenameIDs; 120 std::vector<llvm::StringMapEntry<unsigned>*> FilenamesByID; 121 122 /// LineEntries - This is a map from FileIDs to a list of line entries (sorted 123 /// by the offset they occur in the file. 124 std::map<unsigned, std::vector<LineEntry> > LineEntries; 125 public: 126 LineTableInfo() { 127 } 128 129 void clear() { 130 FilenameIDs.clear(); 131 FilenamesByID.clear(); 132 } 133 134 ~LineTableInfo() {} 135 136 unsigned getLineTableFilenameID(const char *Ptr, unsigned Len); 137 const char *getFilename(unsigned ID) const { 138 assert(ID < FilenamesByID.size() && "Invalid FilenameID"); 139 return FilenamesByID[ID]->getKeyData(); 140 } 141 142 void AddLineNote(unsigned FID, unsigned Offset, 143 unsigned LineNo, int FilenameID); 144 void AddLineNote(unsigned FID, unsigned Offset, 145 unsigned LineNo, int FilenameID, 146 unsigned EntryExit, SrcMgr::CharacteristicKind FileKind); 147 148 149 /// FindNearestLineEntry - Find the line entry nearest to FID that is before 150 /// it. If there is no line entry before Offset in FID, return null. 151 const LineEntry *FindNearestLineEntry(unsigned FID, unsigned Offset); 152 }; 153 } // namespace clang 154 155 unsigned LineTableInfo::getLineTableFilenameID(const char *Ptr, unsigned Len) { 156 // Look up the filename in the string table, returning the pre-existing value 157 // if it exists. 158 llvm::StringMapEntry<unsigned> &Entry = 159 FilenameIDs.GetOrCreateValue(Ptr, Ptr+Len, ~0U); 160 if (Entry.getValue() != ~0U) 161 return Entry.getValue(); 162 163 // Otherwise, assign this the next available ID. 164 Entry.setValue(FilenamesByID.size()); 165 FilenamesByID.push_back(&Entry); 166 return FilenamesByID.size()-1; 167 } 168 169 /// AddLineNote - Add a line note to the line table that indicates that there 170 /// is a #line at the specified FID/Offset location which changes the presumed 171 /// location to LineNo/FilenameID. 172 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset, 173 unsigned LineNo, int FilenameID) { 174 std::vector<LineEntry> &Entries = LineEntries[FID]; 175 176 assert((Entries.empty() || Entries.back().FileOffset < Offset) && 177 "Adding line entries out of order!"); 178 179 SrcMgr::CharacteristicKind Kind = SrcMgr::C_User; 180 unsigned IncludeOffset = 0; 181 182 if (!Entries.empty()) { 183 // If this is a '#line 4' after '#line 42 "foo.h"', make sure to remember 184 // that we are still in "foo.h". 185 if (FilenameID == -1) 186 FilenameID = Entries.back().FilenameID; 187 188 // If we are after a line marker that switched us to system header mode, or 189 // that set #include information, preserve it. 190 Kind = Entries.back().FileKind; 191 IncludeOffset = Entries.back().IncludeOffset; 192 } 193 194 Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, Kind, 195 IncludeOffset)); 196 } 197 198 /// AddLineNote This is the same as the previous version of AddLineNote, but is 199 /// used for GNU line markers. If EntryExit is 0, then this doesn't change the 200 /// presumed #include stack. If it is 1, this is a file entry, if it is 2 then 201 /// this is a file exit. FileKind specifies whether this is a system header or 202 /// extern C system header. 203 void LineTableInfo::AddLineNote(unsigned FID, unsigned Offset, 204 unsigned LineNo, int FilenameID, 205 unsigned EntryExit, 206 SrcMgr::CharacteristicKind FileKind) { 207 assert(FilenameID != -1 && "Unspecified filename should use other accessor"); 208 209 std::vector<LineEntry> &Entries = LineEntries[FID]; 210 211 assert((Entries.empty() || Entries.back().FileOffset < Offset) && 212 "Adding line entries out of order!"); 213 214 unsigned IncludeOffset = 0; 215 if (EntryExit == 0) { // No #include stack change. 216 IncludeOffset = Entries.empty() ? 0 : Entries.back().IncludeOffset; 217 } else if (EntryExit == 1) { 218 IncludeOffset = Offset-1; 219 } else if (EntryExit == 2) { 220 assert(!Entries.empty() && Entries.back().IncludeOffset && 221 "PPDirectives should have caught case when popping empty include stack"); 222 223 // Get the include loc of the last entries' include loc as our include loc. 224 IncludeOffset = 0; 225 if (const LineEntry *PrevEntry = 226 FindNearestLineEntry(FID, Entries.back().IncludeOffset)) 227 IncludeOffset = PrevEntry->IncludeOffset; 228 } 229 230 Entries.push_back(LineEntry::get(Offset, LineNo, FilenameID, FileKind, 231 IncludeOffset)); 232 } 233 234 235 /// FindNearestLineEntry - Find the line entry nearest to FID that is before 236 /// it. If there is no line entry before Offset in FID, return null. 237 const LineEntry *LineTableInfo::FindNearestLineEntry(unsigned FID, 238 unsigned Offset) { 239 const std::vector<LineEntry> &Entries = LineEntries[FID]; 240 assert(!Entries.empty() && "No #line entries for this FID after all!"); 241 242 // It is very common for the query to be after the last #line, check this 243 // first. 244 if (Entries.back().FileOffset <= Offset) 245 return &Entries.back(); 246 247 // Do a binary search to find the maximal element that is still before Offset. 248 std::vector<LineEntry>::const_iterator I = 249 std::upper_bound(Entries.begin(), Entries.end(), Offset); 250 if (I == Entries.begin()) return 0; 251 return &*--I; 252 } 253 254 255 /// getLineTableFilenameID - Return the uniqued ID for the specified filename. 256 /// 257 unsigned SourceManager::getLineTableFilenameID(const char *Ptr, unsigned Len) { 258 if (LineTable == 0) 259 LineTable = new LineTableInfo(); 260 return LineTable->getLineTableFilenameID(Ptr, Len); 261 } 262 263 264 /// AddLineNote - Add a line note to the line table for the FileID and offset 265 /// specified by Loc. If FilenameID is -1, it is considered to be 266 /// unspecified. 267 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo, 268 int FilenameID) { 269 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 270 271 const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile(); 272 273 // Remember that this file has #line directives now if it doesn't already. 274 const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives(); 275 276 if (LineTable == 0) 277 LineTable = new LineTableInfo(); 278 LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID); 279 } 280 281 /// AddLineNote - Add a GNU line marker to the line table. 282 void SourceManager::AddLineNote(SourceLocation Loc, unsigned LineNo, 283 int FilenameID, bool IsFileEntry, 284 bool IsFileExit, bool IsSystemHeader, 285 bool IsExternCHeader) { 286 // If there is no filename and no flags, this is treated just like a #line, 287 // which does not change the flags of the previous line marker. 288 if (FilenameID == -1) { 289 assert(!IsFileEntry && !IsFileExit && !IsSystemHeader && !IsExternCHeader && 290 "Can't set flags without setting the filename!"); 291 return AddLineNote(Loc, LineNo, FilenameID); 292 } 293 294 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 295 const SrcMgr::FileInfo &FileInfo = getSLocEntry(LocInfo.first).getFile(); 296 297 // Remember that this file has #line directives now if it doesn't already. 298 const_cast<SrcMgr::FileInfo&>(FileInfo).setHasLineDirectives(); 299 300 if (LineTable == 0) 301 LineTable = new LineTableInfo(); 302 303 SrcMgr::CharacteristicKind FileKind; 304 if (IsExternCHeader) 305 FileKind = SrcMgr::C_ExternCSystem; 306 else if (IsSystemHeader) 307 FileKind = SrcMgr::C_System; 308 else 309 FileKind = SrcMgr::C_User; 310 311 unsigned EntryExit = 0; 312 if (IsFileEntry) 313 EntryExit = 1; 314 else if (IsFileExit) 315 EntryExit = 2; 316 317 LineTable->AddLineNote(LocInfo.first.ID, LocInfo.second, LineNo, FilenameID, 318 EntryExit, FileKind); 319 } 320 321 322 //===----------------------------------------------------------------------===// 323 // Private 'Create' methods. 324 //===----------------------------------------------------------------------===// 325 326 SourceManager::~SourceManager() { 327 delete LineTable; 328 329 // Delete FileEntry objects corresponding to content caches. Since the actual 330 // content cache objects are bump pointer allocated, we just have to run the 331 // dtors, but we call the deallocate method for completeness. 332 for (unsigned i = 0, e = MemBufferInfos.size(); i != e; ++i) { 333 MemBufferInfos[i]->~ContentCache(); 334 ContentCacheAlloc.Deallocate(MemBufferInfos[i]); 335 } 336 for (llvm::DenseMap<const FileEntry*, SrcMgr::ContentCache*>::iterator 337 I = FileInfos.begin(), E = FileInfos.end(); I != E; ++I) { 338 I->second->~ContentCache(); 339 ContentCacheAlloc.Deallocate(I->second); 340 } 341 } 342 343 void SourceManager::clearIDTables() { 344 MainFileID = FileID(); 345 SLocEntryTable.clear(); 346 LastLineNoFileIDQuery = FileID(); 347 LastLineNoContentCache = 0; 348 LastFileIDLookup = FileID(); 349 350 if (LineTable) 351 LineTable->clear(); 352 353 // Use up FileID #0 as an invalid instantiation. 354 NextOffset = 0; 355 createInstantiationLoc(SourceLocation(),SourceLocation(),SourceLocation(), 1); 356 } 357 358 /// getOrCreateContentCache - Create or return a cached ContentCache for the 359 /// specified file. 360 const ContentCache * 361 SourceManager::getOrCreateContentCache(const FileEntry *FileEnt) { 362 assert(FileEnt && "Didn't specify a file entry to use?"); 363 364 // Do we already have information about this file? 365 ContentCache *&Entry = FileInfos[FileEnt]; 366 if (Entry) return Entry; 367 368 // Nope, create a new Cache entry. Make sure it is at least 8-byte aligned 369 // so that FileInfo can use the low 3 bits of the pointer for its own 370 // nefarious purposes. 371 unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment; 372 EntryAlign = std::max(8U, EntryAlign); 373 Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign); 374 new (Entry) ContentCache(FileEnt); 375 return Entry; 376 } 377 378 379 /// createMemBufferContentCache - Create a new ContentCache for the specified 380 /// memory buffer. This does no caching. 381 const ContentCache* 382 SourceManager::createMemBufferContentCache(const MemoryBuffer *Buffer) { 383 // Add a new ContentCache to the MemBufferInfos list and return it. Make sure 384 // it is at least 8-byte aligned so that FileInfo can use the low 3 bits of 385 // the pointer for its own nefarious purposes. 386 unsigned EntryAlign = llvm::AlignOf<ContentCache>::Alignment; 387 EntryAlign = std::max(8U, EntryAlign); 388 ContentCache *Entry = ContentCacheAlloc.Allocate<ContentCache>(1, EntryAlign); 389 new (Entry) ContentCache(); 390 MemBufferInfos.push_back(Entry); 391 Entry->setBuffer(Buffer); 392 return Entry; 393 } 394 395 //===----------------------------------------------------------------------===// 396 // Methods to create new FileID's and instantiations. 397 //===----------------------------------------------------------------------===// 398 399 /// createFileID - Create a new fileID for the specified ContentCache and 400 /// include position. This works regardless of whether the ContentCache 401 /// corresponds to a file or some other input source. 402 FileID SourceManager::createFileID(const ContentCache *File, 403 SourceLocation IncludePos, 404 SrcMgr::CharacteristicKind FileCharacter) { 405 SLocEntryTable.push_back(SLocEntry::get(NextOffset, 406 FileInfo::get(IncludePos, File, 407 FileCharacter))); 408 unsigned FileSize = File->getSize(); 409 assert(NextOffset+FileSize+1 > NextOffset && "Ran out of source locations!"); 410 NextOffset += FileSize+1; 411 412 // Set LastFileIDLookup to the newly created file. The next getFileID call is 413 // almost guaranteed to be from that file. 414 return LastFileIDLookup = FileID::get(SLocEntryTable.size()-1); 415 } 416 417 /// createInstantiationLoc - Return a new SourceLocation that encodes the fact 418 /// that a token from SpellingLoc should actually be referenced from 419 /// InstantiationLoc. 420 SourceLocation SourceManager::createInstantiationLoc(SourceLocation SpellingLoc, 421 SourceLocation ILocStart, 422 SourceLocation ILocEnd, 423 unsigned TokLength) { 424 InstantiationInfo II = InstantiationInfo::get(ILocStart,ILocEnd, SpellingLoc); 425 SLocEntryTable.push_back(SLocEntry::get(NextOffset, II)); 426 assert(NextOffset+TokLength+1 > NextOffset && "Ran out of source locations!"); 427 NextOffset += TokLength+1; 428 return SourceLocation::getMacroLoc(NextOffset-(TokLength+1)); 429 } 430 431 /// getBufferData - Return a pointer to the start and end of the source buffer 432 /// data for the specified FileID. 433 std::pair<const char*, const char*> 434 SourceManager::getBufferData(FileID FID) const { 435 const llvm::MemoryBuffer *Buf = getBuffer(FID); 436 return std::make_pair(Buf->getBufferStart(), Buf->getBufferEnd()); 437 } 438 439 440 //===----------------------------------------------------------------------===// 441 // SourceLocation manipulation methods. 442 //===----------------------------------------------------------------------===// 443 444 /// getFileIDSlow - Return the FileID for a SourceLocation. This is a very hot 445 /// method that is used for all SourceManager queries that start with a 446 /// SourceLocation object. It is responsible for finding the entry in 447 /// SLocEntryTable which contains the specified location. 448 /// 449 FileID SourceManager::getFileIDSlow(unsigned SLocOffset) const { 450 assert(SLocOffset && "Invalid FileID"); 451 452 // After the first and second level caches, I see two common sorts of 453 // behavior: 1) a lot of searched FileID's are "near" the cached file location 454 // or are "near" the cached instantiation location. 2) others are just 455 // completely random and may be a very long way away. 456 // 457 // To handle this, we do a linear search for up to 8 steps to catch #1 quickly 458 // then we fall back to a less cache efficient, but more scalable, binary 459 // search to find the location. 460 461 // See if this is near the file point - worst case we start scanning from the 462 // most newly created FileID. 463 std::vector<SrcMgr::SLocEntry>::const_iterator I; 464 465 if (SLocEntryTable[LastFileIDLookup.ID].getOffset() < SLocOffset) { 466 // Neither loc prunes our search. 467 I = SLocEntryTable.end(); 468 } else { 469 // Perhaps it is near the file point. 470 I = SLocEntryTable.begin()+LastFileIDLookup.ID; 471 } 472 473 // Find the FileID that contains this. "I" is an iterator that points to a 474 // FileID whose offset is known to be larger than SLocOffset. 475 unsigned NumProbes = 0; 476 while (1) { 477 --I; 478 if (I->getOffset() <= SLocOffset) { 479 #if 0 480 printf("lin %d -> %d [%s] %d %d\n", SLocOffset, 481 I-SLocEntryTable.begin(), 482 I->isInstantiation() ? "inst" : "file", 483 LastFileIDLookup.ID, int(SLocEntryTable.end()-I)); 484 #endif 485 FileID Res = FileID::get(I-SLocEntryTable.begin()); 486 487 // If this isn't an instantiation, remember it. We have good locality 488 // across FileID lookups. 489 if (!I->isInstantiation()) 490 LastFileIDLookup = Res; 491 NumLinearScans += NumProbes+1; 492 return Res; 493 } 494 if (++NumProbes == 8) 495 break; 496 } 497 498 // Convert "I" back into an index. We know that it is an entry whose index is 499 // larger than the offset we are looking for. 500 unsigned GreaterIndex = I-SLocEntryTable.begin(); 501 // LessIndex - This is the lower bound of the range that we're searching. 502 // We know that the offset corresponding to the FileID is is less than 503 // SLocOffset. 504 unsigned LessIndex = 0; 505 NumProbes = 0; 506 while (1) { 507 unsigned MiddleIndex = (GreaterIndex-LessIndex)/2+LessIndex; 508 unsigned MidOffset = SLocEntryTable[MiddleIndex].getOffset(); 509 510 ++NumProbes; 511 512 // If the offset of the midpoint is too large, chop the high side of the 513 // range to the midpoint. 514 if (MidOffset > SLocOffset) { 515 GreaterIndex = MiddleIndex; 516 continue; 517 } 518 519 // If the middle index contains the value, succeed and return. 520 if (isOffsetInFileID(FileID::get(MiddleIndex), SLocOffset)) { 521 #if 0 522 printf("bin %d -> %d [%s] %d %d\n", SLocOffset, 523 I-SLocEntryTable.begin(), 524 I->isInstantiation() ? "inst" : "file", 525 LastFileIDLookup.ID, int(SLocEntryTable.end()-I)); 526 #endif 527 FileID Res = FileID::get(MiddleIndex); 528 529 // If this isn't an instantiation, remember it. We have good locality 530 // across FileID lookups. 531 if (!I->isInstantiation()) 532 LastFileIDLookup = Res; 533 NumBinaryProbes += NumProbes; 534 return Res; 535 } 536 537 // Otherwise, move the low-side up to the middle index. 538 LessIndex = MiddleIndex; 539 } 540 } 541 542 SourceLocation SourceManager:: 543 getInstantiationLocSlowCase(SourceLocation Loc) const { 544 do { 545 std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc); 546 Loc = getSLocEntry(LocInfo.first).getInstantiation() 547 .getInstantiationLocStart(); 548 Loc = Loc.getFileLocWithOffset(LocInfo.second); 549 } while (!Loc.isFileID()); 550 551 return Loc; 552 } 553 554 SourceLocation SourceManager::getSpellingLocSlowCase(SourceLocation Loc) const { 555 do { 556 std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc); 557 Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc(); 558 Loc = Loc.getFileLocWithOffset(LocInfo.second); 559 } while (!Loc.isFileID()); 560 return Loc; 561 } 562 563 564 std::pair<FileID, unsigned> 565 SourceManager::getDecomposedInstantiationLocSlowCase(const SrcMgr::SLocEntry *E, 566 unsigned Offset) const { 567 // If this is an instantiation record, walk through all the instantiation 568 // points. 569 FileID FID; 570 SourceLocation Loc; 571 do { 572 Loc = E->getInstantiation().getInstantiationLocStart(); 573 574 FID = getFileID(Loc); 575 E = &getSLocEntry(FID); 576 Offset += Loc.getOffset()-E->getOffset(); 577 } while (!Loc.isFileID()); 578 579 return std::make_pair(FID, Offset); 580 } 581 582 std::pair<FileID, unsigned> 583 SourceManager::getDecomposedSpellingLocSlowCase(const SrcMgr::SLocEntry *E, 584 unsigned Offset) const { 585 // If this is an instantiation record, walk through all the instantiation 586 // points. 587 FileID FID; 588 SourceLocation Loc; 589 do { 590 Loc = E->getInstantiation().getSpellingLoc(); 591 592 FID = getFileID(Loc); 593 E = &getSLocEntry(FID); 594 Offset += Loc.getOffset()-E->getOffset(); 595 } while (!Loc.isFileID()); 596 597 return std::make_pair(FID, Offset); 598 } 599 600 /// getImmediateSpellingLoc - Given a SourceLocation object, return the 601 /// spelling location referenced by the ID. This is the first level down 602 /// towards the place where the characters that make up the lexed token can be 603 /// found. This should not generally be used by clients. 604 SourceLocation SourceManager::getImmediateSpellingLoc(SourceLocation Loc) const{ 605 if (Loc.isFileID()) return Loc; 606 std::pair<FileID, unsigned> LocInfo = getDecomposedLoc(Loc); 607 Loc = getSLocEntry(LocInfo.first).getInstantiation().getSpellingLoc(); 608 return Loc.getFileLocWithOffset(LocInfo.second); 609 } 610 611 612 /// getImmediateInstantiationRange - Loc is required to be an instantiation 613 /// location. Return the start/end of the instantiation information. 614 std::pair<SourceLocation,SourceLocation> 615 SourceManager::getImmediateInstantiationRange(SourceLocation Loc) const { 616 assert(Loc.isMacroID() && "Not an instantiation loc!"); 617 const InstantiationInfo &II = getSLocEntry(getFileID(Loc)).getInstantiation(); 618 return II.getInstantiationLocRange(); 619 } 620 621 /// getInstantiationRange - Given a SourceLocation object, return the 622 /// range of tokens covered by the instantiation in the ultimate file. 623 std::pair<SourceLocation,SourceLocation> 624 SourceManager::getInstantiationRange(SourceLocation Loc) const { 625 if (Loc.isFileID()) return std::make_pair(Loc, Loc); 626 627 std::pair<SourceLocation,SourceLocation> Res = 628 getImmediateInstantiationRange(Loc); 629 630 // Fully resolve the start and end locations to their ultimate instantiation 631 // points. 632 while (!Res.first.isFileID()) 633 Res.first = getImmediateInstantiationRange(Res.first).first; 634 while (!Res.second.isFileID()) 635 Res.second = getImmediateInstantiationRange(Res.second).second; 636 return Res; 637 } 638 639 640 641 //===----------------------------------------------------------------------===// 642 // Queries about the code at a SourceLocation. 643 //===----------------------------------------------------------------------===// 644 645 /// getCharacterData - Return a pointer to the start of the specified location 646 /// in the appropriate MemoryBuffer. 647 const char *SourceManager::getCharacterData(SourceLocation SL) const { 648 // Note that this is a hot function in the getSpelling() path, which is 649 // heavily used by -E mode. 650 std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(SL); 651 652 // Note that calling 'getBuffer()' may lazily page in a source file. 653 return getSLocEntry(LocInfo.first).getFile().getContentCache() 654 ->getBuffer()->getBufferStart() + LocInfo.second; 655 } 656 657 658 /// getColumnNumber - Return the column # for the specified file position. 659 /// this is significantly cheaper to compute than the line number. 660 unsigned SourceManager::getColumnNumber(FileID FID, unsigned FilePos) const { 661 const char *Buf = getBuffer(FID)->getBufferStart(); 662 663 unsigned LineStart = FilePos; 664 while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r') 665 --LineStart; 666 return FilePos-LineStart+1; 667 } 668 669 unsigned SourceManager::getSpellingColumnNumber(SourceLocation Loc) const { 670 if (Loc.isInvalid()) return 0; 671 std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc); 672 return getColumnNumber(LocInfo.first, LocInfo.second); 673 } 674 675 unsigned SourceManager::getInstantiationColumnNumber(SourceLocation Loc) const { 676 if (Loc.isInvalid()) return 0; 677 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 678 return getColumnNumber(LocInfo.first, LocInfo.second); 679 } 680 681 682 683 static void ComputeLineNumbers(ContentCache* FI, 684 llvm::BumpPtrAllocator &Alloc) DISABLE_INLINE; 685 static void ComputeLineNumbers(ContentCache* FI, llvm::BumpPtrAllocator &Alloc){ 686 // Note that calling 'getBuffer()' may lazily page in the file. 687 const MemoryBuffer *Buffer = FI->getBuffer(); 688 689 // Find the file offsets of all of the *physical* source lines. This does 690 // not look at trigraphs, escaped newlines, or anything else tricky. 691 std::vector<unsigned> LineOffsets; 692 693 // Line #1 starts at char 0. 694 LineOffsets.push_back(0); 695 696 const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart(); 697 const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd(); 698 unsigned Offs = 0; 699 while (1) { 700 // Skip over the contents of the line. 701 // TODO: Vectorize this? This is very performance sensitive for programs 702 // with lots of diagnostics and in -E mode. 703 const unsigned char *NextBuf = (const unsigned char *)Buf; 704 while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0') 705 ++NextBuf; 706 Offs += NextBuf-Buf; 707 Buf = NextBuf; 708 709 if (Buf[0] == '\n' || Buf[0] == '\r') { 710 // If this is \n\r or \r\n, skip both characters. 711 if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1]) 712 ++Offs, ++Buf; 713 ++Offs, ++Buf; 714 LineOffsets.push_back(Offs); 715 } else { 716 // Otherwise, this is a null. If end of file, exit. 717 if (Buf == End) break; 718 // Otherwise, skip the null. 719 ++Offs, ++Buf; 720 } 721 } 722 723 // Copy the offsets into the FileInfo structure. 724 FI->NumLines = LineOffsets.size(); 725 FI->SourceLineCache = Alloc.Allocate<unsigned>(LineOffsets.size()); 726 std::copy(LineOffsets.begin(), LineOffsets.end(), FI->SourceLineCache); 727 } 728 729 /// getLineNumber - Given a SourceLocation, return the spelling line number 730 /// for the position indicated. This requires building and caching a table of 731 /// line offsets for the MemoryBuffer, so this is not cheap: use only when 732 /// about to emit a diagnostic. 733 unsigned SourceManager::getLineNumber(FileID FID, unsigned FilePos) const { 734 ContentCache *Content; 735 if (LastLineNoFileIDQuery == FID) 736 Content = LastLineNoContentCache; 737 else 738 Content = const_cast<ContentCache*>(getSLocEntry(FID) 739 .getFile().getContentCache()); 740 741 // If this is the first use of line information for this buffer, compute the 742 /// SourceLineCache for it on demand. 743 if (Content->SourceLineCache == 0) 744 ComputeLineNumbers(Content, ContentCacheAlloc); 745 746 // Okay, we know we have a line number table. Do a binary search to find the 747 // line number that this character position lands on. 748 unsigned *SourceLineCache = Content->SourceLineCache; 749 unsigned *SourceLineCacheStart = SourceLineCache; 750 unsigned *SourceLineCacheEnd = SourceLineCache + Content->NumLines; 751 752 unsigned QueriedFilePos = FilePos+1; 753 754 // If the previous query was to the same file, we know both the file pos from 755 // that query and the line number returned. This allows us to narrow the 756 // search space from the entire file to something near the match. 757 if (LastLineNoFileIDQuery == FID) { 758 if (QueriedFilePos >= LastLineNoFilePos) { 759 SourceLineCache = SourceLineCache+LastLineNoResult-1; 760 761 // The query is likely to be nearby the previous one. Here we check to 762 // see if it is within 5, 10 or 20 lines. It can be far away in cases 763 // where big comment blocks and vertical whitespace eat up lines but 764 // contribute no tokens. 765 if (SourceLineCache+5 < SourceLineCacheEnd) { 766 if (SourceLineCache[5] > QueriedFilePos) 767 SourceLineCacheEnd = SourceLineCache+5; 768 else if (SourceLineCache+10 < SourceLineCacheEnd) { 769 if (SourceLineCache[10] > QueriedFilePos) 770 SourceLineCacheEnd = SourceLineCache+10; 771 else if (SourceLineCache+20 < SourceLineCacheEnd) { 772 if (SourceLineCache[20] > QueriedFilePos) 773 SourceLineCacheEnd = SourceLineCache+20; 774 } 775 } 776 } 777 } else { 778 SourceLineCacheEnd = SourceLineCache+LastLineNoResult+1; 779 } 780 } 781 782 // If the spread is large, do a "radix" test as our initial guess, based on 783 // the assumption that lines average to approximately the same length. 784 // NOTE: This is currently disabled, as it does not appear to be profitable in 785 // initial measurements. 786 if (0 && SourceLineCacheEnd-SourceLineCache > 20) { 787 unsigned FileLen = Content->SourceLineCache[Content->NumLines-1]; 788 789 // Take a stab at guessing where it is. 790 unsigned ApproxPos = Content->NumLines*QueriedFilePos / FileLen; 791 792 // Check for -10 and +10 lines. 793 unsigned LowerBound = std::max(int(ApproxPos-10), 0); 794 unsigned UpperBound = std::min(ApproxPos+10, FileLen); 795 796 // If the computed lower bound is less than the query location, move it in. 797 if (SourceLineCache < SourceLineCacheStart+LowerBound && 798 SourceLineCacheStart[LowerBound] < QueriedFilePos) 799 SourceLineCache = SourceLineCacheStart+LowerBound; 800 801 // If the computed upper bound is greater than the query location, move it. 802 if (SourceLineCacheEnd > SourceLineCacheStart+UpperBound && 803 SourceLineCacheStart[UpperBound] >= QueriedFilePos) 804 SourceLineCacheEnd = SourceLineCacheStart+UpperBound; 805 } 806 807 unsigned *Pos 808 = std::lower_bound(SourceLineCache, SourceLineCacheEnd, QueriedFilePos); 809 unsigned LineNo = Pos-SourceLineCacheStart; 810 811 LastLineNoFileIDQuery = FID; 812 LastLineNoContentCache = Content; 813 LastLineNoFilePos = QueriedFilePos; 814 LastLineNoResult = LineNo; 815 return LineNo; 816 } 817 818 unsigned SourceManager::getInstantiationLineNumber(SourceLocation Loc) const { 819 if (Loc.isInvalid()) return 0; 820 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 821 return getLineNumber(LocInfo.first, LocInfo.second); 822 } 823 unsigned SourceManager::getSpellingLineNumber(SourceLocation Loc) const { 824 if (Loc.isInvalid()) return 0; 825 std::pair<FileID, unsigned> LocInfo = getDecomposedSpellingLoc(Loc); 826 return getLineNumber(LocInfo.first, LocInfo.second); 827 } 828 829 /// getFileCharacteristic - return the file characteristic of the specified 830 /// source location, indicating whether this is a normal file, a system 831 /// header, or an "implicit extern C" system header. 832 /// 833 /// This state can be modified with flags on GNU linemarker directives like: 834 /// # 4 "foo.h" 3 835 /// which changes all source locations in the current file after that to be 836 /// considered to be from a system header. 837 SrcMgr::CharacteristicKind 838 SourceManager::getFileCharacteristic(SourceLocation Loc) const { 839 assert(!Loc.isInvalid() && "Can't get file characteristic of invalid loc!"); 840 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 841 const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile(); 842 843 // If there are no #line directives in this file, just return the whole-file 844 // state. 845 if (!FI.hasLineDirectives()) 846 return FI.getFileCharacteristic(); 847 848 assert(LineTable && "Can't have linetable entries without a LineTable!"); 849 // See if there is a #line directive before the location. 850 const LineEntry *Entry = 851 LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second); 852 853 // If this is before the first line marker, use the file characteristic. 854 if (!Entry) 855 return FI.getFileCharacteristic(); 856 857 return Entry->FileKind; 858 } 859 860 /// Return the filename or buffer identifier of the buffer the location is in. 861 /// Note that this name does not respect #line directives. Use getPresumedLoc 862 /// for normal clients. 863 const char *SourceManager::getBufferName(SourceLocation Loc) const { 864 if (Loc.isInvalid()) return "<invalid loc>"; 865 866 return getBuffer(getFileID(Loc))->getBufferIdentifier(); 867 } 868 869 870 /// getPresumedLoc - This method returns the "presumed" location of a 871 /// SourceLocation specifies. A "presumed location" can be modified by #line 872 /// or GNU line marker directives. This provides a view on the data that a 873 /// user should see in diagnostics, for example. 874 /// 875 /// Note that a presumed location is always given as the instantiation point 876 /// of an instantiation location, not at the spelling location. 877 PresumedLoc SourceManager::getPresumedLoc(SourceLocation Loc) const { 878 if (Loc.isInvalid()) return PresumedLoc(); 879 880 // Presumed locations are always for instantiation points. 881 std::pair<FileID, unsigned> LocInfo = getDecomposedInstantiationLoc(Loc); 882 883 const SrcMgr::FileInfo &FI = getSLocEntry(LocInfo.first).getFile(); 884 const SrcMgr::ContentCache *C = FI.getContentCache(); 885 886 // To get the source name, first consult the FileEntry (if one exists) 887 // before the MemBuffer as this will avoid unnecessarily paging in the 888 // MemBuffer. 889 const char *Filename = 890 C->Entry ? C->Entry->getName() : C->getBuffer()->getBufferIdentifier(); 891 unsigned LineNo = getLineNumber(LocInfo.first, LocInfo.second); 892 unsigned ColNo = getColumnNumber(LocInfo.first, LocInfo.second); 893 SourceLocation IncludeLoc = FI.getIncludeLoc(); 894 895 // If we have #line directives in this file, update and overwrite the physical 896 // location info if appropriate. 897 if (FI.hasLineDirectives()) { 898 assert(LineTable && "Can't have linetable entries without a LineTable!"); 899 // See if there is a #line directive before this. If so, get it. 900 if (const LineEntry *Entry = 901 LineTable->FindNearestLineEntry(LocInfo.first.ID, LocInfo.second)) { 902 // If the LineEntry indicates a filename, use it. 903 if (Entry->FilenameID != -1) 904 Filename = LineTable->getFilename(Entry->FilenameID); 905 906 // Use the line number specified by the LineEntry. This line number may 907 // be multiple lines down from the line entry. Add the difference in 908 // physical line numbers from the query point and the line marker to the 909 // total. 910 unsigned MarkerLineNo = getLineNumber(LocInfo.first, Entry->FileOffset); 911 LineNo = Entry->LineNo + (LineNo-MarkerLineNo-1); 912 913 // Note that column numbers are not molested by line markers. 914 915 // Handle virtual #include manipulation. 916 if (Entry->IncludeOffset) { 917 IncludeLoc = getLocForStartOfFile(LocInfo.first); 918 IncludeLoc = IncludeLoc.getFileLocWithOffset(Entry->IncludeOffset); 919 } 920 } 921 } 922 923 return PresumedLoc(Filename, LineNo, ColNo, IncludeLoc); 924 } 925 926 //===----------------------------------------------------------------------===// 927 // Other miscellaneous methods. 928 //===----------------------------------------------------------------------===// 929 930 931 /// PrintStats - Print statistics to stderr. 932 /// 933 void SourceManager::PrintStats() const { 934 llvm::cerr << "\n*** Source Manager Stats:\n"; 935 llvm::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size() 936 << " mem buffers mapped.\n"; 937 llvm::cerr << SLocEntryTable.size() << " SLocEntry's allocated, " 938 << NextOffset << "B of Sloc address space used.\n"; 939 940 unsigned NumLineNumsComputed = 0; 941 unsigned NumFileBytesMapped = 0; 942 for (fileinfo_iterator I = fileinfo_begin(), E = fileinfo_end(); I != E; ++I){ 943 NumLineNumsComputed += I->second->SourceLineCache != 0; 944 NumFileBytesMapped += I->second->getSizeBytesMapped(); 945 } 946 947 llvm::cerr << NumFileBytesMapped << " bytes of files mapped, " 948 << NumLineNumsComputed << " files with line #'s computed.\n"; 949 llvm::cerr << "FileID scans: " << NumLinearScans << " linear, " 950 << NumBinaryProbes << " binary.\n"; 951 } 952 953 //===----------------------------------------------------------------------===// 954 // Serialization. 955 //===----------------------------------------------------------------------===// 956 957 void ContentCache::Emit(llvm::Serializer& S) const { 958 S.FlushRecord(); 959 S.EmitPtr(this); 960 961 if (Entry) { 962 llvm::sys::Path Fname(Buffer->getBufferIdentifier()); 963 964 if (Fname.isAbsolute()) 965 S.EmitCStr(Fname.c_str()); 966 else { 967 // Create an absolute path. 968 // FIXME: This will potentially contain ".." and "." in the path. 969 llvm::sys::Path path = llvm::sys::Path::GetCurrentDirectory(); 970 path.appendComponent(Fname.c_str()); 971 S.EmitCStr(path.c_str()); 972 } 973 } 974 else { 975 const char* p = Buffer->getBufferStart(); 976 const char* e = Buffer->getBufferEnd(); 977 978 S.EmitInt(e-p); 979 980 for ( ; p != e; ++p) 981 S.EmitInt(*p); 982 } 983 984 S.FlushRecord(); 985 } 986 987 void ContentCache::ReadToSourceManager(llvm::Deserializer& D, 988 SourceManager& SMgr, 989 FileManager* FMgr, 990 std::vector<char>& Buf) { 991 if (FMgr) { 992 llvm::SerializedPtrID PtrID = D.ReadPtrID(); 993 D.ReadCStr(Buf,false); 994 995 // Create/fetch the FileEntry. 996 const char* start = &Buf[0]; 997 const FileEntry* E = FMgr->getFile(start,start+Buf.size()); 998 999 // FIXME: Ideally we want a lazy materialization of the ContentCache 1000 // anyway, because we don't want to read in source files unless this 1001 // is absolutely needed. 1002 if (!E) 1003 D.RegisterPtr(PtrID,NULL); 1004 else 1005 // Get the ContextCache object and register it with the deserializer. 1006 D.RegisterPtr(PtrID, SMgr.getOrCreateContentCache(E)); 1007 return; 1008 } 1009 1010 // Register the ContextCache object with the deserializer. 1011 /* FIXME: 1012 ContentCache *Entry 1013 SMgr.MemBufferInfos.push_back(ContentCache()); 1014 = const_cast<ContentCache&>(SMgr.MemBufferInfos.back()); 1015 D.RegisterPtr(&Entry); 1016 1017 // Create the buffer. 1018 unsigned Size = D.ReadInt(); 1019 Entry.Buffer = MemoryBuffer::getNewUninitMemBuffer(Size); 1020 1021 // Read the contents of the buffer. 1022 char* p = const_cast<char*>(Entry.Buffer->getBufferStart()); 1023 for (unsigned i = 0; i < Size ; ++i) 1024 p[i] = D.ReadInt(); 1025 */ 1026 } 1027 1028 void SourceManager::Emit(llvm::Serializer& S) const { 1029 S.EnterBlock(); 1030 S.EmitPtr(this); 1031 S.EmitInt(MainFileID.getOpaqueValue()); 1032 1033 // Emit: FileInfos. Just emit the file name. 1034 S.EnterBlock(); 1035 1036 // FIXME: Emit FileInfos. 1037 //std::for_each(FileInfos.begin(), FileInfos.end(), 1038 // S.MakeEmitter<ContentCache>()); 1039 1040 S.ExitBlock(); 1041 1042 // Emit: MemBufferInfos 1043 S.EnterBlock(); 1044 1045 /* FIXME: EMIT. 1046 std::for_each(MemBufferInfos.begin(), MemBufferInfos.end(), 1047 S.MakeEmitter<ContentCache>()); 1048 */ 1049 1050 S.ExitBlock(); 1051 1052 // FIXME: Emit SLocEntryTable. 1053 1054 S.ExitBlock(); 1055 } 1056 1057 SourceManager* 1058 SourceManager::CreateAndRegister(llvm::Deserializer &D, FileManager &FMgr) { 1059 SourceManager *M = new SourceManager(); 1060 D.RegisterPtr(M); 1061 1062 // Read: the FileID of the main source file of the translation unit. 1063 M->MainFileID = FileID::get(D.ReadInt()); 1064 1065 std::vector<char> Buf; 1066 1067 /*{ // FIXME Read: FileInfos. 1068 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 1069 while (!D.FinishedBlock(BLoc)) 1070 ContentCache::ReadToSourceManager(D,*M,&FMgr,Buf); 1071 }*/ 1072 1073 { // Read: MemBufferInfos. 1074 llvm::Deserializer::Location BLoc = D.getCurrentBlockLocation(); 1075 while (!D.FinishedBlock(BLoc)) 1076 ContentCache::ReadToSourceManager(D,*M,NULL,Buf); 1077 } 1078 1079 // FIXME: Read SLocEntryTable. 1080 1081 return M; 1082 } 1083