10eae32dcSDimitry Andric //===-- DataFileCache.cpp -------------------------------------------------===//
20eae32dcSDimitry Andric //
30eae32dcSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40eae32dcSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50eae32dcSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60eae32dcSDimitry Andric //
70eae32dcSDimitry Andric //===----------------------------------------------------------------------===//
80eae32dcSDimitry Andric
90eae32dcSDimitry Andric #include "lldb/Core/DataFileCache.h"
100eae32dcSDimitry Andric #include "lldb/Core/Module.h"
110eae32dcSDimitry Andric #include "lldb/Core/ModuleList.h"
120eae32dcSDimitry Andric #include "lldb/Host/FileSystem.h"
130eae32dcSDimitry Andric #include "lldb/Symbol/ObjectFile.h"
140eae32dcSDimitry Andric #include "lldb/Utility/DataEncoder.h"
1581ad6265SDimitry Andric #include "lldb/Utility/LLDBLog.h"
160eae32dcSDimitry Andric #include "lldb/Utility/Log.h"
170eae32dcSDimitry Andric #include "llvm/Support/CachePruning.h"
180eae32dcSDimitry Andric
190eae32dcSDimitry Andric using namespace lldb_private;
200eae32dcSDimitry Andric
210eae32dcSDimitry Andric
GetLLDBIndexCachePolicy()22bdd1243dSDimitry Andric llvm::CachePruningPolicy DataFileCache::GetLLDBIndexCachePolicy() {
23bdd1243dSDimitry Andric static llvm::CachePruningPolicy policy;
24bdd1243dSDimitry Andric static llvm::once_flag once_flag;
25bdd1243dSDimitry Andric
26bdd1243dSDimitry Andric llvm::call_once(once_flag, []() {
27bdd1243dSDimitry Andric // Prune the cache based off of the LLDB settings each time we create a
28bdd1243dSDimitry Andric // cache object.
290eae32dcSDimitry Andric ModuleListProperties &properties =
300eae32dcSDimitry Andric ModuleList::GetGlobalModuleListProperties();
310eae32dcSDimitry Andric // Only scan once an hour. If we have lots of debug sessions we don't want
320eae32dcSDimitry Andric // to scan this directory too often. A timestamp file is written to the
33bdd1243dSDimitry Andric // directory to ensure different processes don't scan the directory too
34bdd1243dSDimitry Andric // often. This setting doesn't mean that a thread will continually scan the
35bdd1243dSDimitry Andric // cache directory within this process.
360eae32dcSDimitry Andric policy.Interval = std::chrono::hours(1);
370eae32dcSDimitry Andric // Get the user settings for pruning.
380eae32dcSDimitry Andric policy.MaxSizeBytes = properties.GetLLDBIndexCacheMaxByteSize();
390eae32dcSDimitry Andric policy.MaxSizePercentageOfAvailableSpace =
400eae32dcSDimitry Andric properties.GetLLDBIndexCacheMaxPercent();
410eae32dcSDimitry Andric policy.Expiration =
420eae32dcSDimitry Andric std::chrono::hours(properties.GetLLDBIndexCacheExpirationDays() * 24);
43bdd1243dSDimitry Andric });
44bdd1243dSDimitry Andric return policy;
45bdd1243dSDimitry Andric }
46bdd1243dSDimitry Andric
DataFileCache(llvm::StringRef path,llvm::CachePruningPolicy policy)47bdd1243dSDimitry Andric DataFileCache::DataFileCache(llvm::StringRef path, llvm::CachePruningPolicy policy) {
48bdd1243dSDimitry Andric m_cache_dir.SetPath(path);
490eae32dcSDimitry Andric pruneCache(path, policy);
500eae32dcSDimitry Andric
510eae32dcSDimitry Andric // This lambda will get called when the data is gotten from the cache and
520eae32dcSDimitry Andric // also after the data was set for a given key. We only need to take
530eae32dcSDimitry Andric // ownership of the data if we are geting the data, so we use the
540eae32dcSDimitry Andric // m_take_ownership member variable to indicate if we need to take
550eae32dcSDimitry Andric // ownership.
560eae32dcSDimitry Andric
57bdd1243dSDimitry Andric auto add_buffer = [this](unsigned task, const llvm::Twine &moduleName,
58bdd1243dSDimitry Andric std::unique_ptr<llvm::MemoryBuffer> m) {
590eae32dcSDimitry Andric if (m_take_ownership)
600eae32dcSDimitry Andric m_mem_buff_up = std::move(m);
610eae32dcSDimitry Andric };
6281ad6265SDimitry Andric llvm::Expected<llvm::FileCache> cache_or_err =
630eae32dcSDimitry Andric llvm::localCache("LLDBModuleCache", "lldb-module", path, add_buffer);
640eae32dcSDimitry Andric if (cache_or_err)
650eae32dcSDimitry Andric m_cache_callback = std::move(*cache_or_err);
660eae32dcSDimitry Andric else {
6781ad6265SDimitry Andric Log *log = GetLog(LLDBLog::Modules);
680eae32dcSDimitry Andric LLDB_LOG_ERROR(log, cache_or_err.takeError(),
690eae32dcSDimitry Andric "failed to create lldb index cache directory: {0}");
700eae32dcSDimitry Andric }
710eae32dcSDimitry Andric }
720eae32dcSDimitry Andric
730eae32dcSDimitry Andric std::unique_ptr<llvm::MemoryBuffer>
GetCachedData(llvm::StringRef key)7481ad6265SDimitry Andric DataFileCache::GetCachedData(llvm::StringRef key) {
750eae32dcSDimitry Andric std::lock_guard<std::mutex> guard(m_mutex);
760eae32dcSDimitry Andric
770eae32dcSDimitry Andric const unsigned task = 1;
780eae32dcSDimitry Andric m_take_ownership = true;
790eae32dcSDimitry Andric // If we call the "m_cache_callback" function and the data is cached, it will
800eae32dcSDimitry Andric // call the "add_buffer" lambda function from the constructor which will in
810eae32dcSDimitry Andric // turn take ownership of the member buffer that is passed to the callback and
820eae32dcSDimitry Andric // put it into a member variable.
8381ad6265SDimitry Andric llvm::Expected<llvm::AddStreamFn> add_stream_or_err =
84bdd1243dSDimitry Andric m_cache_callback(task, key, "");
850eae32dcSDimitry Andric m_take_ownership = false;
860eae32dcSDimitry Andric // At this point we either already called the "add_buffer" lambda with
870eae32dcSDimitry Andric // the data or we haven't. We can tell if we got the cached data by checking
880eae32dcSDimitry Andric // the add_stream function pointer value below.
890eae32dcSDimitry Andric if (add_stream_or_err) {
9081ad6265SDimitry Andric llvm::AddStreamFn &add_stream = *add_stream_or_err;
910eae32dcSDimitry Andric // If the "add_stream" is nullptr, then the data was cached and we already
920eae32dcSDimitry Andric // called the "add_buffer" lambda. If it is valid, then if we were to call
930eae32dcSDimitry Andric // the add_stream function it would cause a cache file to get generated
940eae32dcSDimitry Andric // and we would be expected to fill in the data. In this function we only
950eae32dcSDimitry Andric // want to check if the data was cached, so we don't want to call
960eae32dcSDimitry Andric // "add_stream" in this function.
970eae32dcSDimitry Andric if (!add_stream)
980eae32dcSDimitry Andric return std::move(m_mem_buff_up);
990eae32dcSDimitry Andric } else {
10081ad6265SDimitry Andric Log *log = GetLog(LLDBLog::Modules);
1010eae32dcSDimitry Andric LLDB_LOG_ERROR(log, add_stream_or_err.takeError(),
1020eae32dcSDimitry Andric "failed to get the cache add stream callback for key: {0}");
1030eae32dcSDimitry Andric }
1040eae32dcSDimitry Andric // Data was not cached.
1050eae32dcSDimitry Andric return std::unique_ptr<llvm::MemoryBuffer>();
1060eae32dcSDimitry Andric }
1070eae32dcSDimitry Andric
SetCachedData(llvm::StringRef key,llvm::ArrayRef<uint8_t> data)10881ad6265SDimitry Andric bool DataFileCache::SetCachedData(llvm::StringRef key,
10981ad6265SDimitry Andric llvm::ArrayRef<uint8_t> data) {
1100eae32dcSDimitry Andric std::lock_guard<std::mutex> guard(m_mutex);
1110eae32dcSDimitry Andric const unsigned task = 2;
1120eae32dcSDimitry Andric // If we call this function and the data is cached, it will call the
1130eae32dcSDimitry Andric // add_buffer lambda function from the constructor which will ignore the
1140eae32dcSDimitry Andric // data.
11581ad6265SDimitry Andric llvm::Expected<llvm::AddStreamFn> add_stream_or_err =
116bdd1243dSDimitry Andric m_cache_callback(task, key, "");
1170eae32dcSDimitry Andric // If we reach this code then we either already called the callback with
1180eae32dcSDimitry Andric // the data or we haven't. We can tell if we had the cached data by checking
1190eae32dcSDimitry Andric // the CacheAddStream function pointer value below.
1200eae32dcSDimitry Andric if (add_stream_or_err) {
12181ad6265SDimitry Andric llvm::AddStreamFn &add_stream = *add_stream_or_err;
1220eae32dcSDimitry Andric // If the "add_stream" is nullptr, then the data was cached. If it is
1230eae32dcSDimitry Andric // valid, then if we call the add_stream function with a task it will
1240eae32dcSDimitry Andric // cause the file to get generated, but we only want to check if the data
1250eae32dcSDimitry Andric // is cached here, so we don't want to call it here. Note that the
1260eae32dcSDimitry Andric // add_buffer will also get called in this case after the data has been
1270eae32dcSDimitry Andric // provided, but we won't take ownership of the memory buffer as we just
1280eae32dcSDimitry Andric // want to write the data.
1290eae32dcSDimitry Andric if (add_stream) {
13081ad6265SDimitry Andric llvm::Expected<std::unique_ptr<llvm::CachedFileStream>> file_or_err =
131bdd1243dSDimitry Andric add_stream(task, "");
1320eae32dcSDimitry Andric if (file_or_err) {
13381ad6265SDimitry Andric llvm::CachedFileStream *cfs = file_or_err->get();
1340eae32dcSDimitry Andric cfs->OS->write((const char *)data.data(), data.size());
1350eae32dcSDimitry Andric return true;
1360eae32dcSDimitry Andric } else {
13781ad6265SDimitry Andric Log *log = GetLog(LLDBLog::Modules);
1380eae32dcSDimitry Andric LLDB_LOG_ERROR(log, file_or_err.takeError(),
1390eae32dcSDimitry Andric "failed to get the cache file stream for key: {0}");
1400eae32dcSDimitry Andric }
1410eae32dcSDimitry Andric }
1420eae32dcSDimitry Andric } else {
14381ad6265SDimitry Andric Log *log = GetLog(LLDBLog::Modules);
1440eae32dcSDimitry Andric LLDB_LOG_ERROR(log, add_stream_or_err.takeError(),
1450eae32dcSDimitry Andric "failed to get the cache add stream callback for key: {0}");
1460eae32dcSDimitry Andric }
1470eae32dcSDimitry Andric return false;
1480eae32dcSDimitry Andric }
1490eae32dcSDimitry Andric
GetCacheFilePath(llvm::StringRef key)1500eae32dcSDimitry Andric FileSpec DataFileCache::GetCacheFilePath(llvm::StringRef key) {
1510eae32dcSDimitry Andric FileSpec cache_file(m_cache_dir);
1520eae32dcSDimitry Andric std::string filename("llvmcache-");
1530eae32dcSDimitry Andric filename += key.str();
1540eae32dcSDimitry Andric cache_file.AppendPathComponent(filename);
1550eae32dcSDimitry Andric return cache_file;
1560eae32dcSDimitry Andric }
1570eae32dcSDimitry Andric
RemoveCacheFile(llvm::StringRef key)1580eae32dcSDimitry Andric Status DataFileCache::RemoveCacheFile(llvm::StringRef key) {
1590eae32dcSDimitry Andric FileSpec cache_file = GetCacheFilePath(key);
1600eae32dcSDimitry Andric FileSystem &fs = FileSystem::Instance();
1610eae32dcSDimitry Andric if (!fs.Exists(cache_file))
1620eae32dcSDimitry Andric return Status();
1630eae32dcSDimitry Andric return fs.RemoveFile(cache_file);
1640eae32dcSDimitry Andric }
1650eae32dcSDimitry Andric
CacheSignature(lldb_private::Module * module)1660eae32dcSDimitry Andric CacheSignature::CacheSignature(lldb_private::Module *module) {
1670eae32dcSDimitry Andric Clear();
1680eae32dcSDimitry Andric UUID uuid = module->GetUUID();
1690eae32dcSDimitry Andric if (uuid.IsValid())
1700eae32dcSDimitry Andric m_uuid = uuid;
1710eae32dcSDimitry Andric
1720eae32dcSDimitry Andric std::time_t mod_time = 0;
1730eae32dcSDimitry Andric mod_time = llvm::sys::toTimeT(module->GetModificationTime());
1740eae32dcSDimitry Andric if (mod_time != 0)
1750eae32dcSDimitry Andric m_mod_time = mod_time;
1760eae32dcSDimitry Andric
1770eae32dcSDimitry Andric mod_time = llvm::sys::toTimeT(module->GetObjectModificationTime());
1780eae32dcSDimitry Andric if (mod_time != 0)
1790eae32dcSDimitry Andric m_obj_mod_time = mod_time;
1800eae32dcSDimitry Andric }
1810eae32dcSDimitry Andric
CacheSignature(lldb_private::ObjectFile * objfile)1820eae32dcSDimitry Andric CacheSignature::CacheSignature(lldb_private::ObjectFile *objfile) {
1830eae32dcSDimitry Andric Clear();
1840eae32dcSDimitry Andric UUID uuid = objfile->GetUUID();
1850eae32dcSDimitry Andric if (uuid.IsValid())
1860eae32dcSDimitry Andric m_uuid = uuid;
1870eae32dcSDimitry Andric
1880eae32dcSDimitry Andric std::time_t mod_time = 0;
1890eae32dcSDimitry Andric // Grab the modification time of the object file's file. It isn't always the
1900eae32dcSDimitry Andric // same as the module's file when you have a executable file as the main
1910eae32dcSDimitry Andric // executable, and you have a object file for a symbol file.
1920eae32dcSDimitry Andric FileSystem &fs = FileSystem::Instance();
1930eae32dcSDimitry Andric mod_time = llvm::sys::toTimeT(fs.GetModificationTime(objfile->GetFileSpec()));
1940eae32dcSDimitry Andric if (mod_time != 0)
1950eae32dcSDimitry Andric m_mod_time = mod_time;
1960eae32dcSDimitry Andric
1970eae32dcSDimitry Andric mod_time =
1980eae32dcSDimitry Andric llvm::sys::toTimeT(objfile->GetModule()->GetObjectModificationTime());
1990eae32dcSDimitry Andric if (mod_time != 0)
2000eae32dcSDimitry Andric m_obj_mod_time = mod_time;
2010eae32dcSDimitry Andric }
2020eae32dcSDimitry Andric
2030eae32dcSDimitry Andric enum SignatureEncoding {
2040eae32dcSDimitry Andric eSignatureUUID = 1u,
2050eae32dcSDimitry Andric eSignatureModTime = 2u,
2060eae32dcSDimitry Andric eSignatureObjectModTime = 3u,
2070eae32dcSDimitry Andric eSignatureEnd = 255u,
2080eae32dcSDimitry Andric };
2090eae32dcSDimitry Andric
Encode(DataEncoder & encoder) const21081ad6265SDimitry Andric bool CacheSignature::Encode(DataEncoder &encoder) const {
2110eae32dcSDimitry Andric if (!IsValid())
2120eae32dcSDimitry Andric return false; // Invalid signature, return false!
2130eae32dcSDimitry Andric
21481ad6265SDimitry Andric if (m_uuid) {
2150eae32dcSDimitry Andric llvm::ArrayRef<uint8_t> uuid_bytes = m_uuid->GetBytes();
2160eae32dcSDimitry Andric encoder.AppendU8(eSignatureUUID);
2170eae32dcSDimitry Andric encoder.AppendU8(uuid_bytes.size());
2180eae32dcSDimitry Andric encoder.AppendData(uuid_bytes);
2190eae32dcSDimitry Andric }
22081ad6265SDimitry Andric if (m_mod_time) {
2210eae32dcSDimitry Andric encoder.AppendU8(eSignatureModTime);
2220eae32dcSDimitry Andric encoder.AppendU32(*m_mod_time);
2230eae32dcSDimitry Andric }
22481ad6265SDimitry Andric if (m_obj_mod_time) {
2250eae32dcSDimitry Andric encoder.AppendU8(eSignatureObjectModTime);
2260eae32dcSDimitry Andric encoder.AppendU32(*m_obj_mod_time);
2270eae32dcSDimitry Andric }
2280eae32dcSDimitry Andric encoder.AppendU8(eSignatureEnd);
2290eae32dcSDimitry Andric return true;
2300eae32dcSDimitry Andric }
2310eae32dcSDimitry Andric
Decode(const lldb_private::DataExtractor & data,lldb::offset_t * offset_ptr)23281ad6265SDimitry Andric bool CacheSignature::Decode(const lldb_private::DataExtractor &data,
2330eae32dcSDimitry Andric lldb::offset_t *offset_ptr) {
2340eae32dcSDimitry Andric Clear();
2350eae32dcSDimitry Andric while (uint8_t sig_encoding = data.GetU8(offset_ptr)) {
2360eae32dcSDimitry Andric switch (sig_encoding) {
2370eae32dcSDimitry Andric case eSignatureUUID: {
2380eae32dcSDimitry Andric const uint8_t length = data.GetU8(offset_ptr);
2390eae32dcSDimitry Andric const uint8_t *bytes = (const uint8_t *)data.GetData(offset_ptr, length);
2400eae32dcSDimitry Andric if (bytes != nullptr && length > 0)
241bdd1243dSDimitry Andric m_uuid = UUID(llvm::ArrayRef<uint8_t>(bytes, length));
2420eae32dcSDimitry Andric } break;
2430eae32dcSDimitry Andric case eSignatureModTime: {
2440eae32dcSDimitry Andric uint32_t mod_time = data.GetU32(offset_ptr);
2450eae32dcSDimitry Andric if (mod_time > 0)
2460eae32dcSDimitry Andric m_mod_time = mod_time;
2470eae32dcSDimitry Andric } break;
2480eae32dcSDimitry Andric case eSignatureObjectModTime: {
2490eae32dcSDimitry Andric uint32_t mod_time = data.GetU32(offset_ptr);
2500eae32dcSDimitry Andric if (mod_time > 0)
25181ad6265SDimitry Andric m_obj_mod_time = mod_time;
2520eae32dcSDimitry Andric } break;
2530eae32dcSDimitry Andric case eSignatureEnd:
25481ad6265SDimitry Andric // The definition of is valid changed to only be valid if the UUID is
25581ad6265SDimitry Andric // valid so make sure that if we attempt to decode an old cache file
25681ad6265SDimitry Andric // that we will fail to decode the cache file if the signature isn't
25781ad6265SDimitry Andric // considered valid.
25881ad6265SDimitry Andric return IsValid();
2590eae32dcSDimitry Andric default:
2600eae32dcSDimitry Andric break;
2610eae32dcSDimitry Andric }
2620eae32dcSDimitry Andric }
2630eae32dcSDimitry Andric return false;
2640eae32dcSDimitry Andric }
2650eae32dcSDimitry Andric
Add(ConstString s)2660eae32dcSDimitry Andric uint32_t ConstStringTable::Add(ConstString s) {
2670eae32dcSDimitry Andric auto pos = m_string_to_offset.find(s);
2680eae32dcSDimitry Andric if (pos != m_string_to_offset.end())
2690eae32dcSDimitry Andric return pos->second;
2700eae32dcSDimitry Andric const uint32_t offset = m_next_offset;
2710eae32dcSDimitry Andric m_strings.push_back(s);
2720eae32dcSDimitry Andric m_string_to_offset[s] = offset;
2730eae32dcSDimitry Andric m_next_offset += s.GetLength() + 1;
2740eae32dcSDimitry Andric return offset;
2750eae32dcSDimitry Andric }
2760eae32dcSDimitry Andric
2770eae32dcSDimitry Andric static const llvm::StringRef kStringTableIdentifier("STAB");
2780eae32dcSDimitry Andric
Encode(DataEncoder & encoder)2790eae32dcSDimitry Andric bool ConstStringTable::Encode(DataEncoder &encoder) {
2800eae32dcSDimitry Andric // Write an 4 character code into the stream. This will help us when decoding
2810eae32dcSDimitry Andric // to make sure we find this identifier when decoding the string table to make
2820eae32dcSDimitry Andric // sure we have the rigth data. It also helps to identify the string table
2830eae32dcSDimitry Andric // when dumping the hex bytes in a cache file.
2840eae32dcSDimitry Andric encoder.AppendData(kStringTableIdentifier);
2850eae32dcSDimitry Andric size_t length_offset = encoder.GetByteSize();
2860eae32dcSDimitry Andric encoder.AppendU32(0); // Total length of all strings which will be fixed up.
2870eae32dcSDimitry Andric size_t strtab_offset = encoder.GetByteSize();
288*5f757f3fSDimitry Andric encoder.AppendU8(0); // Start the string table with an empty string.
2890eae32dcSDimitry Andric for (auto s: m_strings) {
2900eae32dcSDimitry Andric // Make sure all of the offsets match up with what we handed out!
2910eae32dcSDimitry Andric assert(m_string_to_offset.find(s)->second ==
2920eae32dcSDimitry Andric encoder.GetByteSize() - strtab_offset);
2930eae32dcSDimitry Andric // Append the C string into the encoder
2940eae32dcSDimitry Andric encoder.AppendCString(s.GetStringRef());
2950eae32dcSDimitry Andric }
2960eae32dcSDimitry Andric // Fixup the string table length.
2970eae32dcSDimitry Andric encoder.PutU32(length_offset, encoder.GetByteSize() - strtab_offset);
2980eae32dcSDimitry Andric return true;
2990eae32dcSDimitry Andric }
3000eae32dcSDimitry Andric
Decode(const lldb_private::DataExtractor & data,lldb::offset_t * offset_ptr)30181ad6265SDimitry Andric bool StringTableReader::Decode(const lldb_private::DataExtractor &data,
3020eae32dcSDimitry Andric lldb::offset_t *offset_ptr) {
3030eae32dcSDimitry Andric llvm::StringRef identifier((const char *)data.GetData(offset_ptr, 4), 4);
3040eae32dcSDimitry Andric if (identifier != kStringTableIdentifier)
3050eae32dcSDimitry Andric return false;
3060eae32dcSDimitry Andric const uint32_t length = data.GetU32(offset_ptr);
3070eae32dcSDimitry Andric // We always have at least one byte for the empty string at offset zero.
3080eae32dcSDimitry Andric if (length == 0)
3090eae32dcSDimitry Andric return false;
3100eae32dcSDimitry Andric const char *bytes = (const char *)data.GetData(offset_ptr, length);
3110eae32dcSDimitry Andric if (bytes == nullptr)
3120eae32dcSDimitry Andric return false;
31381ad6265SDimitry Andric m_data = llvm::StringRef(bytes, length);
3140eae32dcSDimitry Andric return true;
3150eae32dcSDimitry Andric }
3160eae32dcSDimitry Andric
Get(uint32_t offset) const31781ad6265SDimitry Andric llvm::StringRef StringTableReader::Get(uint32_t offset) const {
3180eae32dcSDimitry Andric if (offset >= m_data.size())
31981ad6265SDimitry Andric return llvm::StringRef();
32081ad6265SDimitry Andric return llvm::StringRef(m_data.data() + offset);
3210eae32dcSDimitry Andric }
322bdd1243dSDimitry Andric
323