1//===----------------------------------------------------------------------===//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//===----------------------------------------------------------------------===//
10/// This file implements OnDiskGraphDB, an on-disk CAS nodes database,
11/// independent of a particular hashing algorithm. It only needs to be
12/// configured for the hash size and controls the schema of the storage.
14/// OnDiskGraphDB defines:
16/// - How the data is stored inside database, either as a standalone file, or
17/// allocated inside a datapool.
18/// - How references to other objects inside the same database is stored. They
19/// are stored as internal references, instead of full hash value to save
21/// - How to chain databases together and import objects from upstream
24/// Here's a top-level description of the current layout:
26/// - db/index.<version>: a file for the "index" table, named by \a
27/// IndexTableName and managed by \a TrieRawHashMap. The contents are 8B
28/// that are accessed atomically, describing the object kind and where/how
29/// it's stored (including an optional file offset). See \a TrieRecord for
31/// - db/data.<version>: a file for the "data" table, named by \a
32/// DataPoolTableName and managed by \a DataStore. New objects within
33/// TrieRecord::MaxEmbeddedSize are inserted here as \a
34/// TrieRecord::StorageKind::DataPool.
35/// - db/obj.<offset>.<version>: a file storing an object outside the main
36/// "data" table, named by its offset into the "index" table, with the
37/// format of \a TrieRecord::StorageKind::Standalone.
38/// - db/leaf.<offset>.<version>: a file storing a leaf node outside the
39/// main "data" table, named by its offset into the "index" table, with
40/// the format of \a TrieRecord::StorageKind::StandaloneLeaf.
41/// - db/leaf+0.<offset>.<version>: a file storing a null-terminated leaf object
42/// outside the main "data" table, named by its offset into the "index" table,
43/// with the format of \a TrieRecord::StorageKind::StandaloneLeaf0.
45//===----------------------------------------------------------------------===//
67 #define DEBUG_TYPE "on-disk-cas"
85 return ID.takeError();
88 "corrupt object '" +
toHex(*
ID) +
"'");
93/// Trie record data: 8 bytes, atomic<uint64_t>
94/// - 1-byte: StorageKind
95/// - 7-bytes: DataStoreOffset (offset into referenced file)
98 enum class StorageKind : uint8_t {
102 /// data.vX: main pool, full DataStore record.
105 /// obj.<TrieRecordOffset>.vX: standalone, with a full DataStore record.
108 /// leaf.<TrieRecordOffset>.vX: standalone, just the data. File contents
109 /// exactly the data content and file size matches the data size. No refs.
112 /// leaf+0.<TrieRecordOffset>.vX: standalone, just the data plus an
113 /// extra null character ('0円'). File size is 1 bigger than the data size.
115 StandaloneLeaf0 = 12,
118 static StringRef getStandaloneFilePrefix(StorageKind SK) {
122 case TrieRecord::StorageKind::Standalone:
124 case TrieRecord::StorageKind::StandaloneLeaf:
126 case TrieRecord::StorageKind::StandaloneLeaf0:
131 enum Limits : int64_t {
132 /// Saves files bigger than 64KB standalone instead of embedding them.
133 MaxEmbeddedSize = 64LL * 1024LL - 1,
137 StorageKind SK = StorageKind::Unknown;
141 /// Pack StorageKind and Offset from Data into 8 byte TrieRecord.
142 static uint64_t pack(Data
D) {
143 assert(
D.Offset.get() < (int64_t)(1ULL << 56));
144 uint64_t
Packed = uint64_t(
D.SK) << 56 |
D.Offset.get();
145 assert(
D.SK != StorageKind::Unknown || Packed == 0);
147 Data RoundTrip = unpack(Packed);
149 assert(
D.Offset.get() == RoundTrip.Offset.get());
154 // Unpack TrieRecord into Data.
155 static Data unpack(uint64_t Packed) {
159 D.SK = (StorageKind)(Packed >> 56);
164 TrieRecord() : Storage(0) {}
166 Data
load()
const {
return unpack(Storage); }
167 bool compare_exchange_strong(Data &Existing, Data New);
170 std::atomic<uint64_t> Storage;
173/// DataStore record data: 4B + size? + refs? + data + 0
175/// - {0,4,8}-bytes: DataSize (may be packed in Header)
176/// - {0,4,8}-bytes: NumRefs (may be packed in Header)
177/// - NumRefs*{4,8}-bytes: Refs[] (end-ptr is 8-byte aligned)
180struct DataRecordHandle {
181 /// NumRefs storage: 4B, 2B, 1B, or 0B (no refs). Or, 8B, for alignment
182 /// convenience to avoid computing padding later.
183 enum class NumRefsFlags : uint8_t {
192 /// DataSize storage: 8B, 4B, 2B, or 1B.
193 enum class DataSizeFlags {
201 /// Kind of ref stored in Refs[]: InternalRef or InternalRef4B.
202 enum class RefKindFlags {
211 DataSizeShift = NumRefsShift + NumRefsBits,
213 RefKindShift = DataSizeShift + DataSizeBits,
216 static_assert(((UINT32_MAX << NumRefsBits) & (uint32_t)NumRefsFlags::Max) ==
219 static_assert(((UINT32_MAX << DataSizeBits) & (uint32_t)DataSizeFlags::Max) ==
222 static_assert(((UINT32_MAX << RefKindBits) & (uint32_t)RefKindFlags::Max) ==
226 /// Layout of the DataRecordHandle and how to decode it.
228 NumRefsFlags NumRefs;
229 DataSizeFlags DataSize;
230 RefKindFlags RefKind;
232 static uint64_t pack(LayoutFlags LF) {
233 unsigned Packed = ((unsigned)LF.NumRefs << NumRefsShift) |
234 ((
unsigned)LF.DataSize << DataSizeShift) |
235 ((unsigned)LF.RefKind << RefKindShift);
237 LayoutFlags RoundTrip = unpack(Packed);
238 assert(LF.NumRefs == RoundTrip.NumRefs);
239 assert(LF.DataSize == RoundTrip.DataSize);
240 assert(LF.RefKind == RoundTrip.RefKind);
244 static LayoutFlags unpack(uint64_t Storage) {
245 assert(Storage <= UINT8_MAX &&
"Expect storage to fit in a byte");
248 (NumRefsFlags)((Storage >> NumRefsShift) & ((1U << NumRefsBits) - 1));
249 LF.DataSize = (DataSizeFlags)((Storage >> DataSizeShift) &
250 ((1U << DataSizeBits) - 1));
252 (RefKindFlags)((Storage >> RefKindShift) & ((1U << RefKindBits) - 1));
258 /// - 1-byte: LayoutFlags
259 /// - 1-byte: 1B size field
260 /// - {0,2}-bytes: 2B size field
262 using PackTy = uint32_t;
265 static constexpr unsigned LayoutFlagsShift =
266 (
sizeof(PackTy) - 1) * CHAR_BIT;
270 InternalRefArrayRef Refs;
274 LayoutFlags getLayoutFlags()
const {
275 return LayoutFlags::unpack(H->Packed >> Header::LayoutFlagsShift);
279 void skipDataSize(LayoutFlags LF, int64_t &RelOffset)
const;
280 uint32_t getNumRefs()
const;
281 void skipNumRefs(LayoutFlags LF, int64_t &RelOffset)
const;
282 int64_t getRefsRelOffset()
const;
283 int64_t getDataRelOffset()
const;
285 static uint64_t getTotalSize(uint64_t DataRelOffset, uint64_t
DataSize) {
286 return DataRelOffset +
DataSize + 1;
288 uint64_t getTotalSize()
const {
292 /// Describe the layout of data stored and how to decode from
293 /// DataRecordHandle.
295 explicit Layout(
const Input &
I);
298 uint64_t DataSize = 0;
299 uint32_t NumRefs = 0;
300 int64_t RefsRelOffset = 0;
301 int64_t DataRelOffset = 0;
302 uint64_t getTotalSize()
const {
303 return DataRecordHandle::getTotalSize(DataRelOffset, DataSize);
307 InternalRefArrayRef getRefs()
const {
308 assert(H &&
"Expected valid handle");
309 auto *BeginByte =
reinterpret_cast<const char *
>(H) + getRefsRelOffset();
310 size_t Size = getNumRefs();
312 return InternalRefArrayRef();
313 if (getLayoutFlags().RefKind == RefKindFlags::InternalRef4B)
314 return ArrayRef(
reinterpret_cast<const InternalRef4B *
>(BeginByte),
Size);
315 return ArrayRef(
reinterpret_cast<const InternalRef *
>(BeginByte),
Size);
318 ArrayRef<char> getData()
const {
319 assert(H &&
"Expected valid handle");
320 return ArrayRef(
reinterpret_cast<const char *
>(H) + getDataRelOffset(),
324 static DataRecordHandle create(function_ref<
char *(
size_t Size)>
Alloc,
326 static Expected<DataRecordHandle>
327 createWithError(function_ref<Expected<char *>(
size_t Size)>
Alloc,
329 static DataRecordHandle construct(
char *Mem,
const Input &
I);
331 static DataRecordHandle
get(
const char *Mem) {
332 return DataRecordHandle(
333 *
reinterpret_cast<const DataRecordHandle::Header *
>(Mem));
335 static Expected<DataRecordHandle>
336 getFromDataPool(
const OnDiskDataAllocator &Pool, FileOffset
Offset);
338 explicit operator bool()
const {
return H; }
339 const Header &getHeader()
const {
return *H; }
341 DataRecordHandle() =
default;
342 explicit DataRecordHandle(
const Header &H) : H(&H) {}
345 static DataRecordHandle constructImpl(
char *Mem,
const Input &
I,
347 const Header *H =
nullptr;
350/// Proxy for any on-disk object or raw data.
351struct OnDiskContent {
352 std::optional<DataRecordHandle> Record;
353 std::optional<ArrayRef<char>> Bytes;
356/// Data loaded inside the memory from standalone file.
357class StandaloneDataInMemory {
359 OnDiskContent getContent()
const;
361 StandaloneDataInMemory(std::unique_ptr<sys::fs::mapped_file_region> Region,
362 TrieRecord::StorageKind SK)
363 : Region(std::
move(Region)), SK(SK) {
365 bool IsStandalone =
false;
367 case TrieRecord::StorageKind::Standalone:
368 case TrieRecord::StorageKind::StandaloneLeaf:
369 case TrieRecord::StorageKind::StandaloneLeaf0:
380 std::unique_ptr<sys::fs::mapped_file_region> Region;
381 TrieRecord::StorageKind SK;
384/// Container to lookup loaded standalone objects.
385template <
size_t NumShards>
class StandaloneDataMap {
386 static_assert(
isPowerOf2_64(NumShards),
"Expected power of 2");
389 uintptr_t insert(ArrayRef<uint8_t> Hash, TrieRecord::StorageKind SK,
390 std::unique_ptr<sys::fs::mapped_file_region> Region);
392 const StandaloneDataInMemory *
lookup(ArrayRef<uint8_t> Hash)
const;
393 bool count(ArrayRef<uint8_t> Hash)
const {
return bool(
lookup(Hash)); }
397 /// Needs to store a std::unique_ptr for a stable address identity.
398 DenseMap<const uint8_t *, std::unique_ptr<StandaloneDataInMemory>> Map;
399 mutable std::mutex Mutex;
401 Shard &getShard(ArrayRef<uint8_t> Hash) {
402 return const_cast<Shard &
>(
403 const_cast<const StandaloneDataMap *
>(
this)->getShard(Hash));
405 const Shard &getShard(ArrayRef<uint8_t> Hash)
const {
406 static_assert(NumShards <= 256,
"Expected only 8 bits of shard");
407 return Shards[Hash[0] % NumShards];
410 Shard Shards[NumShards];
413using StandaloneDataMapTy = StandaloneDataMap<16>;
415/// A vector of internal node references.
416class InternalRefVector {
418 void push_back(InternalRef
Ref) {
420 return FullRefs.push_back(
Ref);
422 return SmallRefs.push_back(*Small);
425 FullRefs.reserve(SmallRefs.size() + 1);
426 for (InternalRef4B Small : SmallRefs)
427 FullRefs.push_back(Small);
428 FullRefs.push_back(
Ref);
432 operator InternalRefArrayRef()
const {
433 assert(SmallRefs.empty() || FullRefs.empty());
434 return NeedsFull ? InternalRefArrayRef(FullRefs)
435 : InternalRefArrayRef(SmallRefs);
439 bool NeedsFull =
false;
449 if (Expected<char *> Mem =
Alloc(
L.getTotalSize()))
450 return constructImpl(*Mem,
I, L);
452 return Mem.takeError();
456DataRecordHandle::create(function_ref<
char *(
size_t Size)>
Alloc,
459 return constructImpl(
Alloc(
L.getTotalSize()),
I, L);
463 // Store the file offset as it is.
469 // Store the pointer from memory with lowest bit set.
474/// Proxy for an on-disk index record.
482uintptr_t StandaloneDataMap<N>::insert(
484 std::unique_ptr<sys::fs::mapped_file_region>
Region) {
485 auto &S = getShard(Hash);
486 std::lock_guard<std::mutex> Lock(S.Mutex);
487 auto &V = S.Map[Hash.
data()];
489 V = std::make_unique<StandaloneDataInMemory>(std::move(
Region), SK);
490 return reinterpret_cast<uintptr_t
>(V.get());
494const StandaloneDataInMemory *
496 auto &S = getShard(Hash);
497 std::lock_guard<std::mutex> Lock(S.Mutex);
498 auto I = S.Map.find(Hash.
data());
499 if (
I == S.Map.end())
506/// Copy of \a sys::fs::TempFile that skips RemoveOnSignal, which is too
507/// expensive to register/unregister at this rate.
509/// FIXME: Add a TempFileManager that maintains a thread-safe list of open temp
510/// files and has a signal handler registerd that removes them all.
513 TempFile(
StringRef Name,
int FD) : TmpName(
std::string(Name)), FD(FD) {}
516 /// This creates a temporary file with createUniqueFile.
518 TempFile(TempFile &&
Other) { *
this = std::move(
Other); }
519 TempFile &operator=(TempFile &&
Other) {
520 TmpName = std::move(
Other.TmpName);
527 // Name of the temporary file.
530 // The open file descriptor.
533 // Keep this with the given name.
534 Error keep(
const Twine &Name);
537 // This checks that keep or delete was called.
541class MappedTempFile {
543 char *
data()
const {
return Map.
data(); }
544 size_t size()
const {
return Map.
size(); }
547 assert(Map &&
"Map already destroyed");
549 return Temp.discard();
552 Error keep(
const Twine &Name) {
553 assert(Map &&
"Map already destroyed");
555 return Temp.keep(Name);
558 MappedTempFile(TempFile Temp, sys::fs::mapped_file_region Map)
563 sys::fs::mapped_file_region Map;
576 // Always try to close and remove.
577 std::error_code RemoveEC;
591 // Always try to close and rename.
611 TempFile Ret(ResultPath,
FD);
612 return std::move(Ret);
615bool TrieRecord::compare_exchange_strong(
Data &Existing,
Data New) {
616 uint64_t ExistingPacked = pack(Existing);
618 if (Storage.compare_exchange_strong(ExistingPacked, NewPacked))
620 Existing = unpack(ExistingPacked);
624DataRecordHandle DataRecordHandle::construct(
char *Mem,
const Input &
I) {
625 return constructImpl(Mem,
I, Layout(
I));
631 auto HeaderData = Pool.
get(
Offset,
sizeof(DataRecordHandle::Header));
633 return HeaderData.takeError();
635 auto Record = DataRecordHandle::get(HeaderData->data());
639 "data record span passed the end of the data pool");
644DataRecordHandle DataRecordHandle::constructImpl(
char *Mem,
const Input &
I,
646 char *
Next = Mem +
sizeof(Header);
648 // Fill in Packed and set other data, then come back to construct the header.
649 Header::PackTy Packed = 0;
650 Packed |= LayoutFlags::pack(L.Flags) << Header::LayoutFlagsShift;
652 // Construct DataSize.
653 switch (L.Flags.DataSize) {
654 case DataSizeFlags::Uses1B:
655 assert(
I.Data.size() <= UINT8_MAX);
656 Packed |= (Header::PackTy)
I.Data.size()
657 << ((
sizeof(Packed) - 2) * CHAR_BIT);
659 case DataSizeFlags::Uses2B:
660 assert(
I.Data.size() <= UINT16_MAX);
661 Packed |= (Header::PackTy)
I.Data.size()
662 << ((
sizeof(Packed) - 4) * CHAR_BIT);
664 case DataSizeFlags::Uses4B:
668 case DataSizeFlags::Uses8B:
674 // Construct NumRefs.
676 // NOTE: May be writing NumRefs even if there are zero refs in order to fix
678 switch (
L.Flags.NumRefs) {
679 case NumRefsFlags::Uses0B:
681 case NumRefsFlags::Uses1B:
682 assert(
I.Refs.size() <= UINT8_MAX);
683 Packed |= (Header::PackTy)
I.Refs.size()
684 << ((
sizeof(
Packed) - 2) * CHAR_BIT);
686 case NumRefsFlags::Uses2B:
687 assert(
I.Refs.size() <= UINT16_MAX);
688 Packed |= (Header::PackTy)
I.Refs.size()
689 << ((
sizeof(
Packed) - 4) * CHAR_BIT);
691 case NumRefsFlags::Uses4B:
695 case NumRefsFlags::Uses8B:
702 if (!
I.Refs.empty()) {
703 assert((
L.Flags.RefKind == RefKindFlags::InternalRef4B) ==
I.Refs.is4B());
704 ArrayRef<uint8_t> RefsBuffer =
I.Refs.getBuffer();
709 // Construct Data and the trailing null.
712 Next[
I.Data.size()] = 0;
714 // Construct the header itself and return.
715 Header *
H =
new (Mem) Header{
Packed};
716 DataRecordHandle Record(*
H);
717 assert(Record.getData() ==
I.Data);
718 assert(Record.getNumRefs() ==
I.Refs.size());
719 assert(Record.getRefs() ==
I.Refs);
720 assert(Record.getLayoutFlags().DataSize ==
L.Flags.DataSize);
721 assert(Record.getLayoutFlags().NumRefs ==
L.Flags.NumRefs);
722 assert(Record.getLayoutFlags().RefKind ==
L.Flags.RefKind);
726DataRecordHandle::Layout::Layout(
const Input &
I) {
727 // Start initial relative offsets right after the Header.
728 uint64_t RelOffset =
sizeof(Header);
730 // Initialize the easy stuff.
732 NumRefs =
I.Refs.size();
736 I.Refs.is4B() ? RefKindFlags::InternalRef4B : RefKindFlags::InternalRef;
738 // Find the smallest slot available for DataSize.
741 if (
DataSize <= UINT8_MAX && Has1B) {
742 Flags.DataSize = DataSizeFlags::Uses1B;
744 }
else if (
DataSize <= UINT16_MAX && Has2B) {
745 Flags.DataSize = DataSizeFlags::Uses2B;
747 }
else if (
DataSize <= UINT32_MAX) {
748 Flags.DataSize = DataSizeFlags::Uses4B;
751 Flags.DataSize = DataSizeFlags::Uses8B;
755 // Find the smallest slot available for NumRefs. Never sets NumRefs8B here.
757 Flags.NumRefs = NumRefsFlags::Uses0B;
758 }
else if (NumRefs <= UINT8_MAX && Has1B) {
759 Flags.NumRefs = NumRefsFlags::Uses1B;
761 }
else if (NumRefs <= UINT16_MAX && Has2B) {
762 Flags.NumRefs = NumRefsFlags::Uses2B;
765 Flags.NumRefs = NumRefsFlags::Uses4B;
769 // Helper to "upgrade" either DataSize or NumRefs by 4B to avoid complicated
770 // padding rules when reading and writing. This also bumps RelOffset.
772 // The value for NumRefs is strictly limited to UINT32_MAX, but it can be
773 // stored as 8B. This means we can *always* find a size to grow.
775 // NOTE: Only call this once.
776 auto GrowSizeFieldsBy4B = [&]() {
780 assert(Flags.NumRefs != NumRefsFlags::Uses8B &&
781 "Expected to be able to grow NumRefs8B");
783 // First try to grow DataSize. NumRefs will not (yet) be 8B, and if
784 // DataSize is upgraded to 8B it'll already be aligned.
786 // Failing that, grow NumRefs.
787 if (Flags.DataSize < DataSizeFlags::Uses4B)
788 Flags.DataSize = DataSizeFlags::Uses4B;
// DataSize: Packed => 4B.
789 else if (Flags.DataSize < DataSizeFlags::Uses8B)
790 Flags.DataSize = DataSizeFlags::Uses8B;
// DataSize: 4B => 8B.
791 else if (Flags.NumRefs < NumRefsFlags::Uses4B)
792 Flags.NumRefs = NumRefsFlags::Uses4B;
// NumRefs: Packed => 4B.
794 Flags.NumRefs = NumRefsFlags::Uses8B;
// NumRefs: 4B => 8B.
798 if (Flags.RefKind == RefKindFlags::InternalRef) {
799 // List of 8B refs should be 8B-aligned. Grow one of the sizes to get this
802 GrowSizeFieldsBy4B();
805 RefsRelOffset = RelOffset;
806 RelOffset += 8 * NumRefs;
808 // The array of 4B refs doesn't need 8B alignment, but the data will need
809 // to be 8B-aligned. Detect this now, and, if necessary, shift everything
810 // by 4B by growing one of the sizes.
811 // If we remove the need for 8B-alignment for data there is <1% savings in
812 // disk storage for a clang build using MCCAS but the 8B-alignment may be
813 // useful in the future so keep it for now.
814 uint64_t RefListSize = 4 * NumRefs;
816 GrowSizeFieldsBy4B();
817 RefsRelOffset = RelOffset;
818 RelOffset += RefListSize;
822 DataRelOffset = RelOffset;
825uint64_t DataRecordHandle::getDataSize()
const {
826 int64_t RelOffset =
sizeof(Header);
827 auto *DataSizePtr =
reinterpret_cast<const char *
>(
H) + RelOffset;
828 switch (getLayoutFlags().
DataSize) {
829 case DataSizeFlags::Uses1B:
830 return (
H->Packed >> ((
sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
831 case DataSizeFlags::Uses2B:
832 return (
H->Packed >> ((
sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
834 case DataSizeFlags::Uses4B:
836 case DataSizeFlags::Uses8B:
842void DataRecordHandle::skipDataSize(LayoutFlags LF, int64_t &RelOffset)
const {
843 if (LF.DataSize >= DataSizeFlags::Uses4B)
845 if (LF.DataSize >= DataSizeFlags::Uses8B)
849uint32_t DataRecordHandle::getNumRefs()
const {
850 LayoutFlags LF = getLayoutFlags();
851 int64_t RelOffset =
sizeof(Header);
852 skipDataSize(LF, RelOffset);
853 auto *NumRefsPtr =
reinterpret_cast<const char *
>(
H) + RelOffset;
854 switch (LF.NumRefs) {
855 case NumRefsFlags::Uses0B:
857 case NumRefsFlags::Uses1B:
858 return (
H->Packed >> ((
sizeof(Header::PackTy) - 2) * CHAR_BIT)) & UINT8_MAX;
859 case NumRefsFlags::Uses2B:
860 return (
H->Packed >> ((
sizeof(Header::PackTy) - 4) * CHAR_BIT)) &
862 case NumRefsFlags::Uses4B:
864 case NumRefsFlags::Uses8B:
870void DataRecordHandle::skipNumRefs(LayoutFlags LF, int64_t &RelOffset)
const {
871 if (LF.NumRefs >= NumRefsFlags::Uses4B)
873 if (LF.NumRefs >= NumRefsFlags::Uses8B)
877int64_t DataRecordHandle::getRefsRelOffset()
const {
878 LayoutFlags LF = getLayoutFlags();
879 int64_t RelOffset =
sizeof(Header);
880 skipDataSize(LF, RelOffset);
881 skipNumRefs(LF, RelOffset);
885int64_t DataRecordHandle::getDataRelOffset()
const {
886 LayoutFlags LF = getLayoutFlags();
887 int64_t RelOffset =
sizeof(Header);
888 skipDataSize(LF, RelOffset);
889 skipNumRefs(LF, RelOffset);
890 uint32_t RefSize = LF.RefKind == RefKindFlags::InternalRef4B ? 4 : 8;
891 RelOffset += RefSize * getNumRefs();
897 if (
auto E = UpstreamDB->validate(Deep, Hasher))
903 auto formatError = [&](
Twine Msg) {
911 if (
Record.Data.size() !=
sizeof(TrieRecord))
912 return formatError(
"wrong data record size");
914 return formatError(
"wrong data record alignment");
916 auto *R =
reinterpret_cast<const TrieRecord *
>(
Record.Data.data());
917 TrieRecord::Data
D = R->load();
918 std::unique_ptr<MemoryBuffer> FileBuffer;
924 return formatError(
"invalid record kind value");
927 auto I = getIndexProxyFromRef(
Ref);
929 return I.takeError();
932 case TrieRecord::StorageKind::Unknown:
933 // This could be an abandoned entry due to a termination before updating
934 // the record. It can be reused by later insertion so just skip this entry
937 case TrieRecord::StorageKind::DataPool:
938 // Check offset is a postive value, and large enough to hold the
939 // header for the data record.
940 if (
D.Offset.get() <= 0 ||
941 (
uint64_t)
D.Offset.get() +
sizeof(DataRecordHandle::Header) >=
943 return formatError(
"datapool record out of bound");
945 case TrieRecord::StorageKind::Standalone:
946 case TrieRecord::StorageKind::StandaloneLeaf:
947 case TrieRecord::StorageKind::StandaloneLeaf0:
949 getStandalonePath(TrieRecord::getStandaloneFilePrefix(
D.SK), *
I, Path);
950 // If need to validate the content of the file later, just load the
951 // buffer here. Otherwise, just check the existance of the file.
954 /*RequiresNullTerminator=*/false);
956 return formatError(
"record file \'" + Path +
"\' does not exist");
958 FileBuffer = std::move(*File);
960 return formatError(
"record file \'" + Path +
"\' does not exist");
966 auto dataError = [&](
Twine Msg) {
968 "bad data for digest \'" +
toHex(
I->Hash) +
975 case TrieRecord::StorageKind::Unknown:
977 case TrieRecord::StorageKind::DataPool: {
978 auto DataRecord = DataRecordHandle::getFromDataPool(DataPool,
D.Offset);
980 return dataError(
toString(DataRecord.takeError()));
982 for (
auto InternRef : DataRecord->getRefs()) {
983 auto Index = getIndexProxyFromRef(InternRef);
985 return Index.takeError();
988 StoredData = DataRecord->getData();
991 case TrieRecord::StorageKind::Standalone: {
992 if (FileBuffer->getBufferSize() <
sizeof(DataRecordHandle::Header))
993 return dataError(
"data record is not big enough to read the header");
994 auto DataRecord = DataRecordHandle::get(FileBuffer->getBufferStart());
995 if (DataRecord.getTotalSize() < FileBuffer->getBufferSize())
997 "data record span passed the end of the standalone file");
998 for (
auto InternRef : DataRecord.getRefs()) {
999 auto Index = getIndexProxyFromRef(InternRef);
1001 return Index.takeError();
1004 StoredData = DataRecord.getData();
1007 case TrieRecord::StorageKind::StandaloneLeaf:
1008 case TrieRecord::StorageKind::StandaloneLeaf0: {
1010 if (
D.SK == TrieRecord::StorageKind::StandaloneLeaf0) {
1011 if (!FileBuffer->getBuffer().ends_with(
'0円'))
1012 return dataError(
"standalone file is not zero terminated");
1020 Hasher(Refs, StoredData, ComputedHash);
1022 return dataError(
"hash mismatch, got \'" +
toHex(ComputedHash) +
1030 OS <<
"on-disk-root-path: " << RootPath <<
"\n";
1042 auto *R =
reinterpret_cast<const TrieRecord *
>(
Data.data());
1043 TrieRecord::Data
D = R->load();
1046 case TrieRecord::StorageKind::Unknown:
1049 case TrieRecord::StorageKind::DataPool:
1053 case TrieRecord::StorageKind::Standalone:
1054 OS <<
"standalone-data ";
1056 case TrieRecord::StorageKind::StandaloneLeaf:
1057 OS <<
"standalone-leaf ";
1059 case TrieRecord::StorageKind::StandaloneLeaf0:
1060 OS <<
"standalone-leaf+0";
1063 OS <<
" Offset=" << (
void *)
D.Offset.get();
1071 Pool, [](PoolInfo LHS, PoolInfo RHS) {
return LHS.Offset < RHS.Offset; });
1072 for (PoolInfo PI : Pool) {
1073 OS <<
"- addr=" << (
void *)PI.Offset <<
" ";
1074 auto D = DataRecordHandle::getFromDataPool(DataPool,
FileOffset(PI.Offset));
1076 OS <<
"error: " <<
toString(
D.takeError());
1080 OS <<
"record refs=" <<
D->getNumRefs() <<
" data=" <<
D->getDataSize()
1081 <<
" size=" <<
D->getTotalSize()
1082 <<
" end=" << (
void *)(PI.Offset +
D->getTotalSize()) <<
"\n";
1088 auto P = Index.insertLazy(
1094 new (TentativeValue.
Data.
data()) TrieRecord();
1097 return P.takeError();
1099 assert(*
P &&
"Expected insertion");
1100 return getIndexProxyFromPointer(*
P);
1107 return IndexProxy{
P.getOffset(),
P->Hash,
1108 *
const_cast<TrieRecord *
>(
1109 reinterpret_cast<const TrieRecord *
>(
P->Data.data()))};
1113 auto I = indexHash(Hash);
1115 return I.takeError();
1116 return getExternalReference(*
I);
1119ObjectID OnDiskGraphDB::getExternalReference(
const IndexProxy &
I) {
1120 return getExternalReference(makeInternalRef(
I.Offset));
1123std::optional<ObjectID>
1126 [&](std::optional<IndexProxy>
I) -> std::optional<ObjectID> {
1128 return std::nullopt;
1129 std::optional<ObjectID> UpstreamID =
1130 UpstreamDB->getExistingReference(Digest);
1132 return std::nullopt;
1135 return std::nullopt;
1138 return getExternalReference(*
I);
1143 return tryUpstream(std::nullopt);
1145 TrieRecord::Data Obj =
I.Ref.load();
1146 if (Obj.SK == TrieRecord::StorageKind::Unknown)
1147 return tryUpstream(
I);
1148 return getExternalReference(makeInternalRef(
I.Offset));
1153 auto P = Index.recoverFromFileOffset(
Ref.getFileOffset());
1155 return P.takeError();
1156 return getIndexProxyFromPointer(*
P);
1160 auto I = getIndexProxyFromRef(
Ref);
1162 return I.takeError();
1172 // Decode ObjectHandle to locate the stored content.
1176 reinterpret_cast<const StandaloneDataInMemory *
>(
Data & (-1ULL << 1));
1177 return SDIM->getContent();
1182 assert(DataHandle.getData().end()[0] == 0 &&
"Null termination");
1183 return OnDiskContent{DataHandle, std::nullopt};
1189 return *Content.Bytes;
1190 assert(Content.Record &&
"Expected record or bytes");
1191 return Content.Record->getData();
1195 if (std::optional<DataRecordHandle>
Record =
1197 return Record->getRefs();
1198 return std::nullopt;
1204 auto I = getIndexProxyFromRef(
Ref);
1206 return I.takeError();
1207 TrieRecord::Data Object =
I->Ref.load();
1209 if (Object.SK == TrieRecord::StorageKind::Unknown)
1210 return faultInFromUpstream(ExternalRef);
1212 if (Object.SK == TrieRecord::StorageKind::DataPool)
1215 // Only TrieRecord::StorageKind::Standalone (and variants) need to be
1216 // explicitly loaded.
1218 // There's corruption if standalone objects have offsets, or if we get here
1219 // for something that isn't standalone.
1222 switch (Object.SK) {
1223 case TrieRecord::StorageKind::Unknown:
1224 case TrieRecord::StorageKind::DataPool:
1226 case TrieRecord::StorageKind::Standalone:
1227 case TrieRecord::StorageKind::StandaloneLeaf0:
1228 case TrieRecord::StorageKind::StandaloneLeaf:
1232 // Load it from disk.
1234 // Note: Creation logic guarantees that data that needs null-termination is
1235 // suitably 0-padded. Requiring null-termination here would be too expensive
1236 // for extremely large objects that happen to be page-aligned.
1238 getStandalonePath(TrieRecord::getStandaloneFilePrefix(Object.SK), *
I, Path);
1251 auto Region = std::make_unique<sys::fs::mapped_file_region>(
1257 static_cast<StandaloneDataMapTy *
>(StandaloneData)
1258 ->insert(
I->Hash, Object.SK, std::move(
Region)));
1262 auto Presence = getObjectPresence(
Ref,
/*CheckUpstream=*/true);
1264 return Presence.takeError();
1266 switch (*Presence) {
1267 case ObjectPresence::Missing:
1269 case ObjectPresence::InPrimaryDB:
1271 case ObjectPresence::OnlyInUpstreamDB:
1272 if (
auto FaultInResult = faultInFromUpstream(
Ref); !FaultInResult)
1273 return FaultInResult.takeError();
1280OnDiskGraphDB::getObjectPresence(
ObjectID ExternalRef,
1281 bool CheckUpstream)
const {
1283 auto I = getIndexProxyFromRef(
Ref);
1285 return I.takeError();
1287 TrieRecord::Data Object =
I->Ref.load();
1288 if (Object.SK != TrieRecord::StorageKind::Unknown)
1289 return ObjectPresence::InPrimaryDB;
1291 if (!CheckUpstream || !UpstreamDB)
1292 return ObjectPresence::Missing;
1294 std::optional<ObjectID> UpstreamID =
1295 UpstreamDB->getExistingReference(getDigest(*
I));
1296 return UpstreamID.has_value() ? ObjectPresence::OnlyInUpstreamDB
1297 : ObjectPresence::Missing;
1304void OnDiskGraphDB::getStandalonePath(
StringRef Prefix,
const IndexProxy &
I,
1306 Path.assign(RootPath.begin(), RootPath.end());
1311OnDiskContent StandaloneDataInMemory::getContent()
const {
1317 case TrieRecord::StorageKind::Standalone:
1319 case TrieRecord::StorageKind::StandaloneLeaf0:
1320 Leaf = Leaf0 =
true;
1322 case TrieRecord::StorageKind::StandaloneLeaf:
1329 assert(
Data.drop_back(Leaf0).end()[0] == 0 &&
1330 "Standalone node data missing null termination");
1331 return OnDiskContent{std::nullopt,
1335 DataRecordHandle Record = DataRecordHandle::get(
Region->data());
1336 assert(Record.getData().end()[0] == 0 &&
1337 "Standalone object record missing null termination for data");
1338 return OnDiskContent{Record, std::nullopt};
1343 assert(
Size &&
"Unexpected request for an empty temp file");
1346 return File.takeError();
1360 return MappedTempFile(std::move(*File), std::move(Map));
1368Error OnDiskGraphDB::createStandaloneLeaf(IndexProxy &
I, ArrayRef<char>
Data) {
1369 assert(
Data.size() > TrieRecord::MaxEmbeddedSize &&
1370 "Expected a bigger file for external content...");
1373 TrieRecord::StorageKind SK = Leaf0 ? TrieRecord::StorageKind::StandaloneLeaf0
1374 : TrieRecord::StorageKind::StandaloneLeaf;
1376 SmallString<256>
Path;
1377 int64_t FileSize =
Data.size() + Leaf0;
1378 getStandalonePath(TrieRecord::getStandaloneFilePrefix(SK),
I, Path);
1380 // Write the file. Don't reuse this mapped_file_region, which is read/write.
1381 // Let load() pull up one that's read-only.
1384 return File.takeError();
1393 // Store the object reference.
1394 TrieRecord::Data Existing;
1396 TrieRecord::Data Leaf{SK, FileOffset()};
1397 if (
I.Ref.compare_exchange_strong(Existing, Leaf)) {
1398 recordStandaloneSizeIncrease(FileSize);
1403 // If there was a race, confirm that the new value has valid storage.
1404 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1412 auto I = getIndexProxyFromRef(getInternalRef(
ID));
1414 return I.takeError();
1416 // Early return in case the node exists.
1418 TrieRecord::Data Existing =
I->Ref.load();
1419 if (Existing.SK != TrieRecord::StorageKind::Unknown)
1424 if (Refs.
empty() &&
Data.size() > TrieRecord::MaxEmbeddedSize)
1425 return createStandaloneLeaf(*
I,
Data);
1427 // TODO: Check whether it's worth checking the index for an already existing
1428 // object (like storeTreeImpl() does) before building up the
1429 // InternalRefVector.
1430 InternalRefVector InternalRefs;
1432 InternalRefs.push_back(getInternalRef(
Ref));
1434 // Create the object.
1436 DataRecordHandle::Input
Input{InternalRefs,
Data};
1438 // Compute the storage kind, allocate it, and create the record.
1439 TrieRecord::StorageKind SK = TrieRecord::StorageKind::Unknown;
1442 std::optional<MappedTempFile> File;
1443 std::optional<uint64_t> FileSize;
1445 getStandalonePath(TrieRecord::getStandaloneFilePrefix(
1446 TrieRecord::StorageKind::Standalone),
1449 return std::move(E);
1452 SK = TrieRecord::StorageKind::Standalone;
1453 return File->data();
1456 if (
Size <= TrieRecord::MaxEmbeddedSize) {
1457 SK = TrieRecord::StorageKind::DataPool;
1458 auto P = DataPool.allocate(
Size);
1460 char *NewAlloc =
nullptr;
1462 P.takeError(), [&](std::unique_ptr<StringError> E) ->
Error {
1463 if (E->convertToErrorCode() == std::errc::not_enough_memory)
1464 return AllocStandaloneFile(Size).moveInto(NewAlloc);
1465 return Error(std::move(E));
1469 return std::move(NewE);
1471 PoolOffset =
P->getOffset();
1473 dbgs() <<
"pool-alloc addr=" << (
void *)PoolOffset.
get()
1475 <<
" end=" << (
void *)(PoolOffset.
get() +
Size) <<
"\n";
1477 return (*P)->data();
1479 return AllocStandaloneFile(
Size);
1486 assert(
Record.getData().end()[0] == 0 &&
"Expected null-termination");
1488 assert(SK != TrieRecord::StorageKind::Unknown);
1489 assert(
bool(File) !=
bool(PoolOffset) &&
1490 "Expected either a mapped file or a pooled offset");
1492 // Check for a race before calling MappedTempFile::keep().
1494 // Then decide what to do with the file. Better to discard than overwrite if
1495 // another thread/process has already added this.
1496 TrieRecord::Data Existing =
I->Ref.load();
1498 TrieRecord::Data NewObject{SK, PoolOffset};
1500 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1502 if (
Error E = File->keep(Path))
1509 // If we didn't already see a racing/existing write, then try storing the
1510 // new object. If that races, confirm that the new value has valid storage.
1512 // TODO: Find a way to reuse the storage from the new-but-abandoned record
1514 if (Existing.SK == TrieRecord::StorageKind::Unknown) {
1515 if (
I->Ref.compare_exchange_strong(Existing, NewObject)) {
1517 recordStandaloneSizeIncrease(*FileSize);
1523 if (Existing.SK == TrieRecord::StorageKind::Unknown)
1526 // Load existing object.
1530void OnDiskGraphDB::recordStandaloneSizeIncrease(
size_t SizeIncrease) {
1531 standaloneStorageSize().fetch_add(SizeIncrease, std::memory_order_relaxed);
1534std::atomic<uint64_t> &OnDiskGraphDB::standaloneStorageSize()
const {
1536 assert(UserHeader.
size() ==
sizeof(std::atomic<uint64_t>));
1538 return *
reinterpret_cast<std::atomic<uint64_t> *
>(UserHeader.
data());
1541uint64_t OnDiskGraphDB::getStandaloneStorageSize()
const {
1542 return standaloneStorageSize().load(std::memory_order_relaxed);
1546 return Index.size() + DataPool.size() + getStandaloneStorageSize();
1550 unsigned IndexPercent = Index.size() * 100ULL / Index.capacity();
1551 unsigned DataPercent = DataPool.size() * 100ULL / DataPool.capacity();
1552 return std::max(IndexPercent, DataPercent);
1557 unsigned HashByteSize, OnDiskGraphDB *UpstreamDB,
1562 constexpr uint64_t MB = 1024ull * 1024ull;
1563 constexpr uint64_t GB = 1024ull * 1024ull * 1024ull;
1566 uint64_t MaxDataPoolSize = 24 * GB;
1569 MaxIndexSize = 1 * GB;
1570 MaxDataPoolSize = 2 * GB;
1575 return CustomSize.takeError();
1577 MaxIndexSize = MaxDataPoolSize = **CustomSize;
1581 std::optional<OnDiskTrieRawHashMap> Index;
1584 HashByteSize * CHAR_BIT,
1585 /*DataSize=*/sizeof(TrieRecord), MaxIndexSize,
1588 return std::move(E);
1590 uint32_t UserHeaderSize =
sizeof(std::atomic<uint64_t>);
1594 std::optional<OnDiskDataAllocator> DataPool;
1600 MaxDataPoolSize,
/*MinFileSize=*/MB, UserHeaderSize,
1601 [](
void *UserHeaderPtr) {
1602 new (UserHeaderPtr) std::atomic<uint64_t>(0);
1604 .moveInto(DataPool))
1605 return std::move(E);
1606 if (DataPool->getUserHeader().size() != UserHeaderSize)
1608 "unexpected user header in '" + DataPoolPath +
1611 return std::unique_ptr<OnDiskGraphDB>(
new OnDiskGraphDB(
1612 AbsPath, std::move(*Index), std::move(*DataPool), UpstreamDB, Policy));
1619 RootPath(RootPath.str()), UpstreamDB(UpstreamDB), FIPolicy(Policy) {
1620 /// Lifetime for "big" objects not in DataPool.
1622 /// NOTE: Could use ThreadSafeTrieRawHashMap here. For now, doing something
1623 /// simpler on the assumption there won't be much contention since most data
1624 /// is not big. If there is contention, and we've already fixed ObjectProxy
1625 /// object handles to be cheap enough to use consistently, the fix might be
1626 /// to use better use of them rather than optimizing this map.
1628 /// FIXME: Figure out the right number of shards, if any.
1629 StandaloneData =
new StandaloneDataMapTy();
1633 delete static_cast<StandaloneDataMapTy *
>(StandaloneData);
1638 // Copies the full CAS tree from upstream. Uses depth-first copying to protect
1639 // against the process dying during importing and leaving the database with an
1640 // incomplete tree. Note that if the upstream has missing nodes then the tree
1641 // will be copied with missing nodes as well, it won't be considered an error.
1642 struct UpstreamCursor {
1648 /// Keeps track of the state of visitation for current node and all of its
1651 /// Keeps track of the currently visited nodes as they are imported into
1652 /// primary database, from current node and its parents. When a node is
1653 /// entered for visitation it appends its own ID, then appends referenced IDs
1654 /// as they get imported. When a node is fully imported it removes the
1655 /// referenced IDs from the bottom of the stack which leaves its own ID at the
1656 /// bottom, adding to the list of referenced IDs for the parent node.
1659 auto enqueueNode = [&](
ObjectID PrimaryID, std::optional<ObjectHandle>
Node) {
1665 (size_t)std::distance(Refs.begin(), Refs.end()),
1666 Refs.begin(), Refs.end()});
1669 enqueueNode(PrimaryID, UpstreamNode);
1671 while (!CursorStack.
empty()) {
1672 UpstreamCursor &Cur = CursorStack.
back();
1673 if (Cur.RefI == Cur.RefE) {
1674 // Copy the node data into the primary store.
1675 // FIXME: Use hard-link or cloning if the file-system supports it and data
1676 // is stored into a separate file.
1678 // The bottom of \p PrimaryNodesStack contains the primary ID for the
1679 // current node plus the list of imported referenced IDs.
1680 assert(PrimaryNodesStack.
size() >= Cur.RefsCount + 1);
1681 ObjectID PrimaryID = *(PrimaryNodesStack.
end() - Cur.RefsCount - 1);
1682 auto PrimaryRefs =
ArrayRef(PrimaryNodesStack)
1683 .slice(PrimaryNodesStack.
size() - Cur.RefsCount);
1687 // Remove the current node and its IDs from the stack.
1688 PrimaryNodesStack.
truncate(PrimaryNodesStack.
size() - Cur.RefsCount);
1693 ObjectID UpstreamID = *(Cur.RefI++);
1694 auto PrimaryID =
getReference(UpstreamDB->getDigest(UpstreamID));
1696 return PrimaryID.takeError();
1698 // This \p ObjectID already exists in the primary. Either it was imported
1699 // via \p importFullTree or the client created it, in which case the
1700 // client takes responsibility for how it was formed.
1701 enqueueNode(*PrimaryID, std::nullopt);
1704 Expected<std::optional<ObjectHandle>> UpstreamNode =
1705 UpstreamDB->load(UpstreamID);
1708 enqueueNode(*PrimaryID, *UpstreamNode);
1718 // Copies only a single node, it doesn't copy the referenced nodes.
1720 // Copy the node data into the primary store.
1721 // FIXME: Use hard-link or cloning if the file-system supports it and data is
1722 // stored into a separate file.
1723 auto Data = UpstreamDB->getObjectData(UpstreamNode);
1724 auto UpstreamRefs = UpstreamDB->getObjectRefs(UpstreamNode);
1726 Refs.
reserve(std::distance(UpstreamRefs.begin(), UpstreamRefs.end()));
1727 for (ObjectID UpstreamRef : UpstreamRefs) {
1730 return Ref.takeError();
1737Expected<std::optional<ObjectHandle>>
1738OnDiskGraphDB::faultInFromUpstream(
ObjectID PrimaryID) {
1740 return std::nullopt;
1742 auto UpstreamID = UpstreamDB->getReference(
getDigest(PrimaryID));
1744 return UpstreamID.takeError();
1746 Expected<std::optional<ObjectHandle>> UpstreamNode =
1747 UpstreamDB->load(*UpstreamID);
1751 return std::nullopt;
1754 ? importSingleNode(PrimaryID, **UpstreamNode)
1755 : importFullTree(PrimaryID, **UpstreamNode))
1756 return std::move(
E);
1757 return load(PrimaryID);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
AMDGPU Prepare AGPR Alloc
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_UNLIKELY(EXPR)
This file defines the DenseMap class.
static cl::opt< int > PageSize("imp-null-check-page-size", cl::desc("The page size of the target in bytes"), cl::init(4096), cl::Hidden)
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
This file declares interface for OnDiskDataAllocator, a file backed data pool can be used to allocate...
static constexpr StringLiteral FilePrefixLeaf0
static constexpr StringLiteral DataPoolTableName
static constexpr StringLiteral FilePrefixObject
static constexpr StringLiteral FilePrefixLeaf
static constexpr StringLiteral IndexFilePrefix
static OnDiskContent getContentFromHandle(const OnDiskDataAllocator &DataPool, ObjectHandle OH)
static constexpr StringLiteral DataPoolFilePrefix
static Error createCorruptObjectError(Expected< ArrayRef< uint8_t > > ID)
static size_t getPageSize()
static Expected< MappedTempFile > createTempFile(StringRef FinalPath, uint64_t Size)
static constexpr StringLiteral IndexTableName
This declares OnDiskGraphDB, an ondisk CAS database with a fixed length hash.
This file declares interface for OnDiskTrieRawHashMap, a thread-safe and (mostly) lock-free hash map ...
Provides a library for accessing information about this process and other processes on the operating ...
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
ArrayRef< T > drop_back(size_t N=1) const
Drop the last N elements of the array.
bool empty() const
empty - Check if the array is empty.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void truncate(size_type N)
Like resize, but requires that N is less than size().
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
StringRef - Represent a constant reference to a string, i.e.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
FileOffset is a wrapper around uint64_t to represent the offset of data from the beginning of the fil...
Handle to a loaded object in a ObjectStore instance.
Expected< ArrayRef< char > > get(FileOffset Offset, size_t Size) const
Get the data of Size stored at the given Offset.
static Expected< OnDiskDataAllocator > create(const Twine &Path, const Twine &TableName, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, uint32_t UserHeaderSize=0, function_ref< void(void *)> UserHeaderInit=nullptr)
OnDiskTrieRawHashMap is a persistent trie data structure used as hash maps.
static Expected< OnDiskTrieRawHashMap > create(const Twine &Path, const Twine &TrieName, size_t NumHashBits, uint64_t DataSize, uint64_t MaxFileSize, std::optional< uint64_t > NewFileInitialSize, std::optional< size_t > NewTableNumRootBits=std::nullopt, std::optional< size_t > NewTableNumSubtrieBits=std::nullopt)
Gets or creates a file at Path with a hash-mapped trie named TrieName.
static std::optional< InternalRef4B > tryToShrink(InternalRef Ref)
Shrink to 4B reference.
Array of internal node references.
Standard 8 byte reference inside OnDiskGraphDB.
static InternalRef getFromOffset(FileOffset Offset)
Handle for a loaded node object.
static ObjectHandle fromFileOffset(FileOffset Offset)
static ObjectHandle fromMemory(uintptr_t Ptr)
ObjectHandle(uint64_t Opaque)
On-disk CAS nodes database, independent of a particular hashing algorithm.
FaultInPolicy
How to fault-in nodes if an upstream database is used.
@ SingleNode
Copy only the requested node.
void print(raw_ostream &OS) const
Expected< std::optional< ObjectHandle > > load(ObjectID Ref)
Expected< bool > isMaterialized(ObjectID Ref)
Check whether the object associated with Ref is stored in the CAS.
Error validate(bool Deep, HashingFuncT Hasher) const
Validate the OnDiskGraphDB.
bool containsObject(ObjectID Ref) const
Check whether the object associated with Ref is stored in the CAS.
object_refs_range getObjectRefs(ObjectHandle Node) const
unsigned getHardStorageLimitUtilization() const
Error store(ObjectID ID, ArrayRef< ObjectID > Refs, ArrayRef< char > Data)
Associate data & references with a particular object ID.
ArrayRef< uint8_t > getDigest(ObjectID Ref) const
static Expected< std::unique_ptr< OnDiskGraphDB > > open(StringRef Path, StringRef HashName, unsigned HashByteSize, OnDiskGraphDB *UpstreamDB=nullptr, FaultInPolicy Policy=FaultInPolicy::FullTree)
Open the on-disk store from a directory.
std::optional< ObjectID > getExistingReference(ArrayRef< uint8_t > Digest)
Get an existing reference to the object Digest.
size_t getStorageSize() const
Expected< ObjectID > getReference(ArrayRef< uint8_t > Hash)
Form a reference for the provided hash.
function_ref< void( ArrayRef< ArrayRef< uint8_t > >, ArrayRef< char >, SmallVectorImpl< uint8_t > &)> HashingFuncT
Hashing function type for validation.
ArrayRef< char > getObjectData(ObjectHandle Node) const
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
static unsigned getPageSizeEstimate()
Get the process's estimated page size.
LLVM_ABI Error keep(const Twine &Name)
static LLVM_ABI Expected< TempFile > create(const Twine &Model, unsigned Mode=all_read|all_write, OpenFlags ExtraFlags=OF_None)
This creates a temporary file with createUniqueFile and schedules it for deletion with sys::RemoveFil...
Represents the result of a call to sys::fs::status().
This class represents a memory mapped file.
LLVM_ABI size_t size() const
@ readonly
May only access map via const_data as read only.
@ readwrite
May access map via data and modify it. Written to path.
LLVM_ABI char * data() const
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
constexpr StringLiteral CASFormatVersion
The version for all the ondisk database files.
Expected< std::optional< uint64_t > > getOverriddenMaxMappingSize()
Retrieves an overridden maximum mapping size for CAS files, if any, speicified by LLVM_CAS_MAX_MAPPIN...
Expected< size_t > preallocateFileTail(int FD, size_t CurrentSize, size_t NewSize)
Allocate space for the file FD on disk, if the filesystem supports it.
bool useSmallMappingSize(const Twine &Path)
Whether to use a small file mapping for ondisk databases created in Path.
uint64_t getDataSize(const FuncRecordTy *Record)
Return the coverage map data size for the function.
uint64_t read64le(const void *P)
void write64le(void *P, uint64_t V)
void write32le(void *P, uint32_t V)
uint32_t read32le(const void *P)
LLVM_ABI std::error_code closeFile(file_t &F)
Close the file object.
LLVM_ABI std::error_code rename(const Twine &from, const Twine &to)
Rename from to to.
std::error_code resize_file_before_mapping_readwrite(int FD, uint64_t Size)
Resize FD to Size before mapping mapped_file_region::readwrite.
LLVM_ABI bool exists(const basic_file_status &status)
Does file exist?
LLVM_ABI std::error_code createUniqueFile(const Twine &Model, int &ResultFD, SmallVectorImpl< char > &ResultPath, OpenFlags Flags=OF_None, unsigned Mode=all_read|all_write)
Create a uniquely named file.
LLVM_ABI std::error_code remove(const Twine &path, bool IgnoreNonExisting=true)
Remove path.
LLVM_ABI Expected< file_t > openNativeFileForRead(const Twine &Name, OpenFlags Flags=OF_None, SmallVectorImpl< char > *RealPath=nullptr)
Opens the file with the given name in a read-only mode, returning its open file descriptor.
LLVM_ABI std::error_code create_directories(const Twine &path, bool IgnoreExisting=true, perms Perms=owner_all|group_all)
Create all the non-existent directories in path.
LLVM_ABI file_t convertFDToNativeFile(int FD)
Converts from a Posix file descriptor number to a native file handle.
LLVM_ABI std::error_code status(const Twine &path, file_status &result, bool follow=true)
Get file status as if by POSIX stat().
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
This is an optimization pass for GlobalISel generic memory operations.
Error createFileError(const Twine &F, Error E)
Concatenate a source file path and/or name with an Error.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
ArrayRef< CharT > arrayRefFromStringRef(StringRef Input)
Construct a string ref from an array ref of unsigned chars.
std::error_code make_error_code(BitcodeError E)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Error handleErrors(Error E, HandlerTs &&... Hs)
Pass the ErrorInfo(s) contained in E to their respective handlers.
FunctionAddr VTableAddr uintptr_t uintptr_t DataSize
std::string utohexstr(uint64_t X, bool LowerCase=false, unsigned Width=0)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Error createStringError(std::error_code EC, char const *Fmt, const Ts &... Vals)
Create formatted StringError object.
std::optional< T > expectedToOptional(Expected< T > &&E)
Convert an Expected to an Optional without doing anything.
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
FunctionAddr VTableAddr Next
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
ArrayRef(const T &OneElt) -> ArrayRef< T >
std::string toString(const APInt &I, unsigned Radix, bool Signed, bool formatAsCLiteral=false, bool UpperCase=true, bool InsertSeparators=false)
OutputIt copy(R &&Range, OutputIt Out)
void toHex(ArrayRef< uint8_t > Input, bool LowerCase, SmallVectorImpl< char > &Output)
Convert buffer Input to its hexadecimal representation. The returned string is double the size of Inp...
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI Error errorCodeToError(std::error_code EC)
Helper for converting an std::error_code to a Error.
void consumeError(Error Err)
Consume a Error without doing anything.
bool isAddrAligned(Align Lhs, const void *Addr)
Checks that Addr is a multiple of the alignment.
Implement std::hash so that hash_code can be used in STL containers.
Proxy for an on-disk index record.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static constexpr Align Of()
Allow constructions of constexpr Align from types.
Const value proxy to access the records stored in TrieRawHashMap.
Value proxy to access the records stored in TrieRawHashMap.
MutableArrayRef< char > Data