Merge "libsnapshot: Use std::unordered_set in DmSnapCowSizeCalculator."
diff --git a/fs_mgr/libsnapshot/dm_snapshot_internals.h b/fs_mgr/libsnapshot/dm_snapshot_internals.h
index ed77c15..4a36251 100644
--- a/fs_mgr/libsnapshot/dm_snapshot_internals.h
+++ b/fs_mgr/libsnapshot/dm_snapshot_internals.h
@@ -17,8 +17,9 @@
#include <android-base/logging.h>
#include <stdint.h>
+#include <limits>
#include <optional>
-#include <vector>
+#include <unordered_set>
namespace android {
namespace snapshot {
@@ -37,21 +38,16 @@
return;
}
- if (modified_chunks_.size() <= chunk_id) {
- if (modified_chunks_.max_size() <= chunk_id) {
- LOG(ERROR) << "Invalid COW size, chunk_id is too large.";
- valid_ = false;
- return;
- }
- modified_chunks_.resize(chunk_id + 1, false);
- if (modified_chunks_.size() <= chunk_id) {
- LOG(ERROR) << "Invalid COW size, chunk_id is too large.";
- valid_ = false;
- return;
- }
+ if (chunk_id > std::numeric_limits<uint32_t>::max()) {
+ LOG(ERROR) << "Chunk exceeds maximum size: " << chunk_id;
+ valid_ = false;
+ return;
+ }
+ if (modified_chunks_.count(chunk_id) > 0) {
+ return;
}
- modified_chunks_[chunk_id] = true;
+ modified_chunks_.emplace(chunk_id);
}
std::optional<uint64_t> cow_size_bytes() const {
@@ -91,23 +87,16 @@
return std::nullopt;
}
- uint64_t modified_chunks_count = 0;
uint64_t cow_chunks = 0;
- for (const auto& c : modified_chunks_) {
- if (c) {
- ++modified_chunks_count;
- }
- }
-
/* disk header + padding = 1 chunk */
cow_chunks += 1;
/* snapshot modified chunks */
- cow_chunks += modified_chunks_count;
+ cow_chunks += modified_chunks_.size();
/* snapshot chunks index metadata */
- cow_chunks += 1 + modified_chunks_count / exceptions_per_chunk;
+ cow_chunks += 1 + modified_chunks_.size() / exceptions_per_chunk;
return cow_chunks;
}
@@ -150,30 +139,8 @@
/*
* |modified_chunks_| is a container that keeps trace of the modified
* chunks.
- * Multiple options were considered when choosing the most appropriate data
- * structure for this container. Here follows a summary of why vector<bool>
- * has been chosen, taking as a reference a snapshot partition of 4 GiB and
- * chunk size of 4 KiB.
- * - std::set<uint64_t> is very space-efficient for a small number of
- * operations, but if the whole snapshot is changed, it would need to
- * store
- * 4 GiB / 4 KiB * (64 bit / 8) = 8 MiB
- * just for the data, plus the additional data overhead for the red-black
- * tree used for data sorting (if each rb-tree element stores 3 address
- * and the word-aligne color, the total size grows to 32 MiB).
- * - std::bitset<N> is not a good fit because requires a priori knowledge,
- * at compile time, of the bitset size.
- * - std::vector<bool> is a special case of vector, which uses a data
- * compression that allows reducing the space utilization of each element
- * to 1 bit. In detail, this data structure is composed of a resizable
- * array of words, each of them representing a bitmap. On a 64 bit
- * device, modifying the whole 4 GiB snapshot grows this container up to
- * 4 * GiB / 4 KiB / 64 = 64 KiB
- * that, even if is the same space requirement to change a single byte at
- * the highest address of the snapshot, is a very affordable space
- * requirement.
*/
- std::vector<bool> modified_chunks_;
+ std::unordered_set<uint32_t> modified_chunks_;
};
} // namespace snapshot