xref: /llvm-project/llvm/lib/Support/CachePruning.cpp (revision 481d224b67fc6d9d1c1cf7e2164b5b4212523cb8)
1 //===-CachePruning.cpp - LLVM Cache Directory Pruning ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the pruning of a directory based on least recently used.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Support/CachePruning.h"
15 
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/Errc.h"
18 #include "llvm/Support/Error.h"
19 #include "llvm/Support/FileSystem.h"
20 #include "llvm/Support/Path.h"
21 #include "llvm/Support/raw_ostream.h"
22 
23 #define DEBUG_TYPE "cache-pruning"
24 
25 #include <set>
26 #include <system_error>
27 
28 using namespace llvm;
29 
30 namespace {
31 struct FileInfo {
32   sys::TimePoint<> Time;
33   uint64_t Size;
34   std::string Path;
35 
36   /// Used to determine which files to prune first. Also used to determine
37   /// set membership, so must take into account all fields.
38   bool operator<(const FileInfo &Other) const {
39     if (Time < Other.Time)
40       return true;
41     else if (Other.Time < Time)
42       return false;
43     if (Other.Size < Size)
44       return true;
45     else if (Size < Other.Size)
46       return false;
47     return Path < Other.Path;
48   }
49 };
50 } // anonymous namespace
51 
52 /// Write a new timestamp file with the given path. This is used for the pruning
53 /// interval option.
54 static void writeTimestampFile(StringRef TimestampFile) {
55   std::error_code EC;
56   raw_fd_ostream Out(TimestampFile.str(), EC, sys::fs::F_None);
57 }
58 
59 static Expected<std::chrono::seconds> parseDuration(StringRef Duration) {
60   if (Duration.empty())
61     return make_error<StringError>("Duration must not be empty",
62                                    inconvertibleErrorCode());
63 
64   StringRef NumStr = Duration.slice(0, Duration.size()-1);
65   uint64_t Num;
66   if (NumStr.getAsInteger(0, Num))
67     return make_error<StringError>("'" + NumStr + "' not an integer",
68                                    inconvertibleErrorCode());
69 
70   switch (Duration.back()) {
71   case 's':
72     return std::chrono::seconds(Num);
73   case 'm':
74     return std::chrono::minutes(Num);
75   case 'h':
76     return std::chrono::hours(Num);
77   default:
78     return make_error<StringError>("'" + Duration +
79                                        "' must end with one of 's', 'm' or 'h'",
80                                    inconvertibleErrorCode());
81   }
82 }
83 
84 Expected<CachePruningPolicy>
85 llvm::parseCachePruningPolicy(StringRef PolicyStr) {
86   CachePruningPolicy Policy;
87   std::pair<StringRef, StringRef> P = {"", PolicyStr};
88   while (!P.second.empty()) {
89     P = P.second.split(':');
90 
91     StringRef Key, Value;
92     std::tie(Key, Value) = P.first.split('=');
93     if (Key == "prune_interval") {
94       auto DurationOrErr = parseDuration(Value);
95       if (!DurationOrErr)
96         return DurationOrErr.takeError();
97       Policy.Interval = *DurationOrErr;
98     } else if (Key == "prune_after") {
99       auto DurationOrErr = parseDuration(Value);
100       if (!DurationOrErr)
101         return DurationOrErr.takeError();
102       Policy.Expiration = *DurationOrErr;
103     } else if (Key == "cache_size") {
104       if (Value.back() != '%')
105         return make_error<StringError>("'" + Value + "' must be a percentage",
106                                        inconvertibleErrorCode());
107       StringRef SizeStr = Value.drop_back();
108       uint64_t Size;
109       if (SizeStr.getAsInteger(0, Size))
110         return make_error<StringError>("'" + SizeStr + "' not an integer",
111                                        inconvertibleErrorCode());
112       if (Size > 100)
113         return make_error<StringError>("'" + SizeStr +
114                                            "' must be between 0 and 100",
115                                        inconvertibleErrorCode());
116       Policy.MaxSizePercentageOfAvailableSpace = Size;
117     } else if (Key == "cache_size_bytes") {
118       uint64_t Mult = 1;
119       switch (tolower(Value.back())) {
120       case 'k':
121         Mult = 1024;
122         Value = Value.drop_back();
123         break;
124       case 'm':
125         Mult = 1024 * 1024;
126         Value = Value.drop_back();
127         break;
128       case 'g':
129         Mult = 1024 * 1024 * 1024;
130         Value = Value.drop_back();
131         break;
132       }
133       uint64_t Size;
134       if (Value.getAsInteger(0, Size))
135         return make_error<StringError>("'" + Value + "' not an integer",
136                                        inconvertibleErrorCode());
137       Policy.MaxSizeBytes = Size * Mult;
138     } else if (Key == "cache_size_files") {
139       if (Value.getAsInteger(0, Policy.MaxSizeFiles))
140         return make_error<StringError>("'" + Value + "' not an integer",
141                                        inconvertibleErrorCode());
142     } else {
143       return make_error<StringError>("Unknown key: '" + Key + "'",
144                                      inconvertibleErrorCode());
145     }
146   }
147 
148   return Policy;
149 }
150 
151 /// Prune the cache of files that haven't been accessed in a long time.
152 bool llvm::pruneCache(StringRef Path, CachePruningPolicy Policy) {
153   using namespace std::chrono;
154 
155   if (Path.empty())
156     return false;
157 
158   bool isPathDir;
159   if (sys::fs::is_directory(Path, isPathDir))
160     return false;
161 
162   if (!isPathDir)
163     return false;
164 
165   Policy.MaxSizePercentageOfAvailableSpace =
166       std::min(Policy.MaxSizePercentageOfAvailableSpace, 100u);
167 
168   if (Policy.Expiration == seconds(0) &&
169       Policy.MaxSizePercentageOfAvailableSpace == 0 &&
170       Policy.MaxSizeBytes == 0 && Policy.MaxSizeFiles == 0) {
171     LLVM_DEBUG(dbgs() << "No pruning settings set, exit early\n");
172     // Nothing will be pruned, early exit
173     return false;
174   }
175 
176   // Try to stat() the timestamp file.
177   SmallString<128> TimestampFile(Path);
178   sys::path::append(TimestampFile, "llvmcache.timestamp");
179   sys::fs::file_status FileStatus;
180   const auto CurrentTime = system_clock::now();
181   if (auto EC = sys::fs::status(TimestampFile, FileStatus)) {
182     if (EC == errc::no_such_file_or_directory) {
183       // If the timestamp file wasn't there, create one now.
184       writeTimestampFile(TimestampFile);
185     } else {
186       // Unknown error?
187       return false;
188     }
189   } else {
190     if (!Policy.Interval)
191       return false;
192     if (Policy.Interval != seconds(0)) {
193       // Check whether the time stamp is older than our pruning interval.
194       // If not, do nothing.
195       const auto TimeStampModTime = FileStatus.getLastModificationTime();
196       auto TimeStampAge = CurrentTime - TimeStampModTime;
197       if (TimeStampAge <= *Policy.Interval) {
198         LLVM_DEBUG(dbgs() << "Timestamp file too recent ("
199                           << duration_cast<seconds>(TimeStampAge).count()
200                           << "s old), do not prune.\n");
201         return false;
202       }
203     }
204     // Write a new timestamp file so that nobody else attempts to prune.
205     // There is a benign race condition here, if two processes happen to
206     // notice at the same time that the timestamp is out-of-date.
207     writeTimestampFile(TimestampFile);
208   }
209 
210   // Keep track of files to delete to get below the size limit.
211   // Order by time of last use so that recently used files are preserved.
212   std::set<FileInfo> FileInfos;
213   uint64_t TotalSize = 0;
214 
215   // Walk the entire directory cache, looking for unused files.
216   std::error_code EC;
217   SmallString<128> CachePathNative;
218   sys::path::native(Path, CachePathNative);
219   // Walk all of the files within this directory.
220   for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd;
221        File != FileEnd && !EC; File.increment(EC)) {
222     // Ignore any files not beginning with the string "llvmcache-". This
223     // includes the timestamp file as well as any files created by the user.
224     // This acts as a safeguard against data loss if the user specifies the
225     // wrong directory as their cache directory.
226     if (!sys::path::filename(File->path()).startswith("llvmcache-"))
227       continue;
228 
229     // Look at this file. If we can't stat it, there's nothing interesting
230     // there.
231     ErrorOr<sys::fs::basic_file_status> StatusOrErr = File->status();
232     if (!StatusOrErr) {
233       LLVM_DEBUG(dbgs() << "Ignore " << File->path() << " (can't stat)\n");
234       continue;
235     }
236 
237     // If the file hasn't been used recently enough, delete it
238     const auto FileAccessTime = StatusOrErr->getLastAccessedTime();
239     auto FileAge = CurrentTime - FileAccessTime;
240     if (Policy.Expiration != seconds(0) && FileAge > Policy.Expiration) {
241       LLVM_DEBUG(dbgs() << "Remove " << File->path() << " ("
242                         << duration_cast<seconds>(FileAge).count()
243                         << "s old)\n");
244       sys::fs::remove(File->path());
245       continue;
246     }
247 
248     // Leave it here for now, but add it to the list of size-based pruning.
249     TotalSize += StatusOrErr->getSize();
250     FileInfos.insert({FileAccessTime, StatusOrErr->getSize(), File->path()});
251   }
252 
253   auto FileInfo = FileInfos.begin();
254   size_t NumFiles = FileInfos.size();
255 
256   auto RemoveCacheFile = [&]() {
257     // Remove the file.
258     sys::fs::remove(FileInfo->Path);
259     // Update size
260     TotalSize -= FileInfo->Size;
261     NumFiles--;
262     LLVM_DEBUG(dbgs() << " - Remove " << FileInfo->Path << " (size "
263                       << FileInfo->Size << "), new occupancy is " << TotalSize
264                       << "%\n");
265     ++FileInfo;
266   };
267 
268   // Prune for number of files.
269   if (Policy.MaxSizeFiles)
270     while (NumFiles > Policy.MaxSizeFiles)
271       RemoveCacheFile();
272 
273   // Prune for size now if needed
274   if (Policy.MaxSizePercentageOfAvailableSpace > 0 || Policy.MaxSizeBytes > 0) {
275     auto ErrOrSpaceInfo = sys::fs::disk_space(Path);
276     if (!ErrOrSpaceInfo) {
277       report_fatal_error("Can't get available size");
278     }
279     sys::fs::space_info SpaceInfo = ErrOrSpaceInfo.get();
280     auto AvailableSpace = TotalSize + SpaceInfo.free;
281 
282     if (Policy.MaxSizePercentageOfAvailableSpace == 0)
283       Policy.MaxSizePercentageOfAvailableSpace = 100;
284     if (Policy.MaxSizeBytes == 0)
285       Policy.MaxSizeBytes = AvailableSpace;
286     auto TotalSizeTarget = std::min<uint64_t>(
287         AvailableSpace * Policy.MaxSizePercentageOfAvailableSpace / 100ull,
288         Policy.MaxSizeBytes);
289 
290     LLVM_DEBUG(dbgs() << "Occupancy: " << ((100 * TotalSize) / AvailableSpace)
291                       << "% target is: "
292                       << Policy.MaxSizePercentageOfAvailableSpace << "%, "
293                       << Policy.MaxSizeBytes << " bytes\n");
294 
295     // Remove the oldest accessed files first, till we get below the threshold.
296     while (TotalSize > TotalSizeTarget && FileInfo != FileInfos.end())
297       RemoveCacheFile();
298   }
299   return true;
300 }
301