1//===----------------------------------------------------------------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
10/// Encapsulates \p OnDiskGraphDB and \p OnDiskKeyValueDB instances within one
11/// directory while also restricting storage growth with a scheme of chaining
12/// the two most recent directories (primary & upstream), where the primary
13/// "faults-in" data from the upstream one. When the primary (most recent)
14/// directory exceeds its intended limit a new empty directory becomes the
17/// Within the top-level directory (the path that \p UnifiedOnDiskCache::open
18/// receives) there are directories named like this:
25/// 'version' is the version integer for this \p UnifiedOnDiskCache's scheme and
26/// the part after the dot is an increasing integer. The primary directory is
27/// the one with the highest integer and the upstream one is the directory
28/// before it. For example, if the sub-directories contained are:
30/// 'v1.5', 'v1.6', 'v1.7', 'v1.8'
32/// Then the primary one is 'v1.8', the upstream one is 'v1.7', and the rest are
33/// unused directories that can be safely deleted at any time and by any
36/// Contained within the top-level directory is a file named "lock" which is
37/// used for processes to take shared or exclusive locks for the contents of the
38/// top directory. While a \p UnifiedOnDiskCache is open it keeps a shared lock
39/// for the top-level directory; when it closes, if the primary sub-directory
40/// exceeded its limit, it attempts to get an exclusive lock in order to create
41/// a new empty primary directory; if it can't get the exclusive lock it gives
42/// up and lets the next \p UnifiedOnDiskCache instance that closes to attempt
45/// The downside of this scheme is that while \p UnifiedOnDiskCache is open on a
46/// directory, by any process, the storage size in that directory will keep
47/// growing unrestricted. But the major benefit is that garbage-collection can
48/// be triggered on a directory concurrently, at any time and by any process,
49/// without affecting any active readers/writers in the same process or other
52/// The \c UnifiedOnDiskCache also provides validation and recovery on top of
53/// the underlying on-disk storage. The low-level storage is designed to remain
54/// coherent across regular process crashes, but may be invalid after power loss
55/// or similar system failures. \c UnifiedOnDiskCache::validateIfNeeded allows
56/// validating the contents once per boot and can recover by marking invalid
57/// data for garbage collection.
59/// The data recovery described above requires exclusive access to the CAS, and
60/// it is an error to attempt recovery if the CAS is open in any process/thread.
61/// In order to maximize backwards compatibility with tools that do not perform
62/// validation before opening the CAS, we do not attempt to get exclusive access
63/// until recovery is actually performed, meaning as long as the data is valid
64/// it will not conflict with concurrent use.
66//===----------------------------------------------------------------------===//
91#if __has_include(<sys/sysctl.h>)
92#include <sys/sysctl.h>
99/// FIXME: When the version of \p DBDirPrefix is bumped up we need to figure out
100/// how to handle the leftover sub-directories of the previous version, within
101/// the \p UnifiedOnDiskCache::collectGarbage function.
108 // little endian encoded.
115 // little endian encoded.
117 static_assert(ValBytes.size() ==
sizeof(
ID.getOpaqueData()));
127 std::optional<ArrayRef<char>> UpstreamValue;
128 if (
Error E = UpstreamKVDB->get(
Key).moveInto(UpstreamValue))
133 // The value is the \p ObjectID in the context of the upstream
134 // \p OnDiskGraphDB instance. Translate it to the context of the primary
135 // \p OnDiskGraphDB instance.
138 PrimaryGraphDB->getReference(UpstreamGraphDB->getDigest(UpstreamID));
140 return PrimaryID.takeError();
144/// \returns all the 'v<version>.<x>' names of sub-directories, sorted with
145/// ascending order of the integer after the dot. Corrupt directories, if
146/// included, will come first.
157 DirI.increment(EC)) {
162 FoundDBDirs.
push_back({0, std::string(SubDir)});
170 "unexpected directory " + DirI->path());
171 FoundDBDirs.
push_back({Order, std::string(SubDir)});
177 return LHS.Order <=
RHS.Order;
181 for (DBDir &Dir : FoundDBDirs)
187 auto DBDirs =
getAllDBDirs(Path,
/*IncludeCorrupt=*/true);
189 return DBDirs.takeError();
191 // FIXME: When the version of \p DBDirPrefix is bumped up we need to figure
192 // out how to handle the leftover sub-directories of the previous version.
194 for (
unsigned Keep = 2;
Keep > 0 && !DBDirs->empty(); --
Keep) {
203/// \returns Given a sub-directory named 'v<version>.<x>', it outputs the
204/// 'v<version>.<x+1>' name.
218 Args.push_back(
"-check-hash");
223 "llvm-cas-validate-stderr",
"txt", StdErrFD, StdErrPath,
228 std::optional<llvm::StringRef> Redirects[] = {
229 {
""},
// stdin = /dev/null
230 {
""},
// stdout = /dev/null
237 /*SecondsToWait=*/120,
/*MemoryLimit=*/0, &ErrMsg);
244 if (!ErrMsg.empty()) {
249 if (StdErrBuf && !(*StdErrBuf)->getBuffer().empty()) {
251 Err += (*StdErrBuf)->getBuffer();
259 unsigned HashByteSize,
bool CheckHash) {
260 std::shared_ptr<UnifiedOnDiskCache> UniDB;
266 if (
Error E = CAS->validate(CheckHash))
269 if (
Error E = Cache->validate())
275#if __has_include(<sys/sysctl.h>) && defined(KERN_BOOTTIME)
277 size_t TVLen =
sizeof(TV);
278 int KernBoot[2] = {CTL_KERN, KERN_BOOTTIME};
279 if (sysctl(KernBoot, 2, &TV, &TVLen,
nullptr, 0) < 0)
281 "failed to get boottime");
282 if (TVLen !=
sizeof(TV))
285#elif defined(__linux__)
286 // Use the mtime for /proc, which is recreated during system boot.
287 // We could also read /proc/stat and search for 'btime'.
291 return Status.getLastModificationTime().time_since_epoch().count();
299 bool CheckHash,
bool AllowRecovery,
bool ForceValidation,
300 std::optional<StringRef> LLVMCasBinaryPath) {
324 if (!Bytes.empty() &&
325 StringRef(Bytes).trim().getAsInteger(10, ValidationBootTime))
334 std::string LogValidationError;
336 if (ValidationBootTime == BootTime && !ForceValidation)
340 bool NeedsRecovery =
false;
348 NeedsRecovery =
true;
365 if (EC == std::errc::no_lock_available)
368 "CAS validation requires exclusive access but CAS was in use");
375 return DBDirs.takeError();
381 int Attempt = 0, MaxAttempts = 100;
383 for (; Attempt < MaxAttempts; ++Attempt) {
388 // Darwin uses ENOTEMPTY. Linux may return either ENOTEMPTY or EEXIST.
392 if (Attempt == MaxAttempts)
394 EC,
"rename " + PathBuf +
395 " failed: too many CAS directories awaiting pruning");
398 " failed: " + EC.message());
402 if (ValidationBootTime != BootTime) {
403 // Fix filename in case we have error to report.
409 OS.
seek(0);
// resize does not reset position
410 OS << BootTime <<
'\n';
420 StringRef HashName,
unsigned HashByteSize,
432 // Locking the directory using shared lock, which will prevent other processes
433 // from creating a new chain (essentially while a \p UnifiedOnDiskCache
434 // instance holds a shared lock the storage for the primary directory will
435 // grow unrestricted).
436 if (std::error_code EC =
442 return DBDirs.takeError();
448 /// If there is only one directory open databases on it. If there are 2 or
449 /// more directories, get the most recent directories and chain them, with the
450 /// most recent being the primary one. The remaining directories are unused
451 /// data than can be garbage-collected.
452 auto UniDB = std::unique_ptr<UnifiedOnDiskCache>(
new UnifiedOnDiskCache());
453 std::unique_ptr<OnDiskGraphDB> UpstreamGraphDB;
454 std::unique_ptr<OnDiskKeyValueDB> UpstreamKVDB;
455 if (DBDirs->size() > 1) {
456 StringRef UpstreamDir = *(DBDirs->end() - 2);
460 /*UpstreamDB=*/nullptr, FaultInPolicy)
461 .moveInto(UpstreamGraphDB))
464 /*ValueName=*/"objectid",
466 .moveInto(UpstreamKVDB))
470 StringRef PrimaryDir = *(DBDirs->end() - 1);
473 std::unique_ptr<OnDiskGraphDB> PrimaryGraphDB;
475 UpstreamGraphDB.get(), FaultInPolicy)
476 .moveInto(PrimaryGraphDB))
478 std::unique_ptr<OnDiskKeyValueDB> PrimaryKVDB;
479 // \p UnifiedOnDiskCache does manual chaining for key-value requests,
480 // including an extra translation step of the value during fault-in.
483 /*ValueName=*/"objectid",
484 /*ValueSize=*/sizeof(
uint64_t), UniDB.get())
485 .moveInto(PrimaryKVDB))
488 UniDB->RootPath = RootPath;
489 UniDB->SizeLimit = SizeLimit.value_or(0);
490 UniDB->LockFD = LockFD;
491 UniDB->NeedsGarbageCollection = DBDirs->size() > 2;
492 UniDB->PrimaryDBDir = PrimaryDir;
493 UniDB->UpstreamGraphDB = std::move(UpstreamGraphDB);
494 UniDB->PrimaryGraphDB = std::move(PrimaryGraphDB);
495 UniDB->UpstreamKVDB = std::move(UpstreamKVDB);
496 UniDB->PrimaryKVDB = std::move(PrimaryKVDB);
498 return std::move(UniDB);
502 this->SizeLimit = SizeLimit.value_or(0);
506 uint64_t TotalSize = getPrimaryStorageSize();
508 TotalSize += UpstreamGraphDB->getStorageSize();
510 TotalSize += UpstreamKVDB->getStorageSize();
514uint64_t UnifiedOnDiskCache::getPrimaryStorageSize()
const {
515 return PrimaryGraphDB->getStorageSize() + PrimaryKVDB->getStorageSize();
523 // If the hard limit is beyond 85%, declare above limit and request clean up.
524 unsigned CurrentPercent =
525 std::max(PrimaryGraphDB->getHardStorageLimitUtilization(),
526 PrimaryKVDB->getHardStorageLimitUtilization());
527 if (CurrentPercent > 85)
530 // We allow each of the directories in the chain to reach up to half the
531 // intended size limit. Check whether the primary directory has exceeded half
532 // the limit or not, in order to decide whether we need to start a new chain.
534 // We could check the size limit against the sum of sizes of both the primary
535 // and upstream directories but then if the upstream is significantly larger
536 // than the intended limit, it would trigger a new chain to be created before
537 // the primary has reached its own limit. Essentially in such situation we
538 // prefer reclaiming the storage later in order to have more consistent cache
540 return (CurSizeLimit / 2) < getPrimaryStorageSize();
554 UpstreamKVDB.reset();
556 UpstreamGraphDB.reset();
557 PrimaryGraphDB.reset();
561 if (!ExceededSizeLimit)
564 // The primary directory exceeded its intended size limit. Try to get an
565 // exclusive lock in order to create a new primary directory for next time
566 // this \p UnifiedOnDiskCache path is opened.
576 // Managed to get an exclusive lock which means there are no other open
577 // \p UnifiedOnDiskCache instances for the same path, so we can safely start a
578 // new primary directory. To start a new primary directory we just have to
579 // create a new empty directory with the next consecutive index; since this is
580 // an atomic operation we will leave the top-level directory in a consistent
581 // state even if the process dies during this code-path.
590 NeedsGarbageCollection =
true;
594UnifiedOnDiskCache::UnifiedOnDiskCache() =
default;
601 return DBDirs.takeError();
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file contains the declaration of the ActionCache class, which is the base class for ActionCache ...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
This declares OnDiskGraphDB, an ondisk CAS database with a fixed length hash.
This declares OnDiskKeyValueDB, a key value storage database of fixed size key and value.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallString class.
This file defines the SmallVector class.
static constexpr StringLiteral DBDirPrefix
FIXME: When the version of DBDirPrefix is bumped up we need to figure out how to handle the leftover ...
static Error validateInProcess(StringRef RootPath, StringRef HashName, unsigned HashByteSize, bool CheckHash)
static Expected< SmallVector< std::string, 4 > > getAllGarbageDirs(StringRef Path)
static constexpr StringLiteral ValidationFilename
static constexpr StringLiteral CorruptPrefix
static void getNextDBDirName(StringRef DBDir, llvm::raw_ostream &OS)
static Error validateOutOfProcess(StringRef LLVMCasBinary, StringRef RootPath, bool CheckHash)
static Expected< uint64_t > getBootTime()
static Expected< SmallVector< std::string, 4 > > getAllDBDirs(StringRef Path, bool IncludeCorrupt=false)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
FileRemover - This class is a simple object meant to be stack allocated.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void assign(StringRef RHS)
Assign from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
bool getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM Value Representation.
static ObjectID fromOpaqueData(uint64_t Opaque)
FaultInPolicy
How to fault-in nodes if an upstream database is used.
static Expected< std::unique_ptr< OnDiskGraphDB > > open(StringRef Path, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB *UpstreamDB=nullptr, FaultInPolicy Policy=FaultInPolicy::FullTree)
Open the on-disk store from a directory.
static Expected< std::unique_ptr< OnDiskKeyValueDB > > open(StringRef Path, StringRef HashName, unsigned KeySize, StringRef ValueName, size_t ValueSize, UnifiedOnDiskCache *UnifiedCache=nullptr)
Open the on-disk store from a directory.
uint64_t getStorageSize() const
static ValueBytes getValueFromObjectID(ObjectID ID)
static Expected< std::unique_ptr< UnifiedOnDiskCache > > open(StringRef Path, std::optional< uint64_t > SizeLimit, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB::FaultInPolicy FaultInPolicy=OnDiskGraphDB::FaultInPolicy::FullTree)
Open a UnifiedOnDiskCache instance for a directory.
Error close(bool CheckSizeLimit=true)
This is called implicitly at destruction time, so it is not required for a client to call this.
static ObjectID getObjectIDFromValue(ArrayRef< char > Value)
Helper function to convert the value stored in KeyValueDB and ObjectID.
static Expected< ValidationResult > validateIfNeeded(StringRef Path, StringRef HashName, unsigned HashByteSize, bool CheckHash, bool AllowRecovery, bool ForceValidation, std::optional< StringRef > LLVMCasBinary)
Validate the data in Path, if needed to ensure correctness.
bool hasExceededSizeLimit() const
std::array< char, sizeof(uint64_t)> ValueBytes
Error collectGarbage()
Remove unused data from the current UnifiedOnDiskCache.
void setSizeLimit(std::optional< uint64_t > SizeLimit)
Set the size for limiting growth.
A raw_ostream that writes to a file descriptor.
bool has_error() const
Return the value of the flag in this raw_fd_ostream indicating whether an output error has been encou...
std::error_code error() const
uint64_t seek(uint64_t off)
Flushes the stream and repositions the underlying file descriptor position to the offset specified fr...
This class implements an extremely fast bulk output stream that can only output to a stream.
A raw_ostream that writes to an SmallVector or SmallString.
directory_iterator - Iterates through the entries in path.
Represents the result of a call to sys::fs::status().
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
std::unique_ptr< ObjectStore > createObjectStoreFromUnifiedOnDiskCache(std::shared_ptr< ondisk::UnifiedOnDiskCache > UniDB)
std::unique_ptr< ActionCache > createActionCacheFromUnifiedOnDiskCache(std::shared_ptr< ondisk::UnifiedOnDiskCache > UniDB)
std::error_code lockFileThreadSafe(int FD, llvm::sys::fs::LockKind Kind)
Thread-safe alternative to sys::fs::lockFile.
std::error_code unlockFileThreadSafe(int FD)
Thread-safe alternative to sys::fs::unlockFile.
std::error_code tryLockFileThreadSafe(int FD, std::chrono::milliseconds Timeout=std::chrono::milliseconds(0), llvm::sys::fs::LockKind Kind=llvm::sys::fs::LockKind::Exclusive)
Thread-safe alternative to sys::fs::tryLockFile.
@ Valid
The data is already valid.
@ Recovered
The data was invalid, but was recovered.
@ Skipped
Validation was skipped, as it was not needed.
uint64_t read64le(const void *P)
void write64le(void *P, uint64_t V)
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
std::error_code openFileForReadWrite(const Twine &Name, int &ResultFD, CreationDisposition Disp, OpenFlags Flags, unsigned Mode=0666)
Opens the file with the given name in a write-only or read-write mode, returning its open file descri...
LLVM_ABI std::error_code rename(const Twine &from, const Twine &to)
Rename from to to.
LLVM_ABI Error readNativeFileToEOF(file_t FileHandle, SmallVectorImpl< char > &Buffer, ssize_t ChunkSize=DefaultReadChunkSize)
Reads from FileHandle until EOF, appending to Buffer in chunks of size ChunkSize.
@ OF_Text
The file should be opened in text mode on platforms like z/OS that make this distinction.
@ CD_OpenAlways
CD_OpenAlways - When opening a file:
LLVM_ABI std::error_code create_directories(const Twine &path, bool IgnoreExisting=true, perms Perms=owner_all|group_all)
Create all the non-existent directories in path.
LLVM_ABI std::error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, int &ResultFD, SmallVectorImpl< char > &ResultPath, OpenFlags Flags=OF_None)
Create a file in the system temporary directory.
LLVM_ABI std::error_code resize_file(int FD, uint64_t Size)
Resize path to size.
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI std::error_code create_directory(const Twine &path, bool IgnoreExisting=true, perms Perms=owner_all|group_all)
Create the directory in path.
LLVM_ABI std::error_code remove_directories(const Twine &path, bool IgnoreErrors=true)
Recursively delete a directory.
LLVM_ABI StringRef get_separator(Style style=Style::native)
Return the preferred separator for this platform.
LLVM_ABI void remove_filename(SmallVectorImpl< char > &path, Style style=Style::native)
Remove the last component from path unless it is the root dir.
LLVM_ABI StringRef filename(StringRef path LLVM_LIFETIME_BOUND, Style style=Style::native)
Get filename.
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
LLVM_ABI int ExecuteAndWait(StringRef Program, ArrayRef< StringRef > Args, std::optional< ArrayRef< StringRef > > Env=std::nullopt, ArrayRef< std::optional< StringRef > > Redirects={}, unsigned SecondsToWait=0, unsigned MemoryLimit=0, std::string *ErrMsg=nullptr, bool *ExecutionFailed=nullptr, std::optional< ProcessStatistics > *ProcStat=nullptr, BitVector *AffinityMask=nullptr)
This function executes the program using the arguments provided.
This is an optimization pass for GlobalISel generic memory operations.
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
std::error_code errnoAsErrorCode()
Helper to get errno as an std::error_code.
void consumeError(Error Err)
Consume a Error without doing anything.
@ Keep
No function return thunk.