adlr@google.com | 3defe6a | 2009-12-04 20:57:17 +0000 | [diff] [blame] | 1 | // Copyright (c) 2009 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "update_engine/delta_diff_generator.h" |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 6 | #include <sys/stat.h> |
| 7 | #include <sys/types.h> |
| 8 | #include <errno.h> |
| 9 | #include <fcntl.h> |
| 10 | #include <algorithm> |
| 11 | #include <set> |
| 12 | #include <string> |
| 13 | #include <utility> |
| 14 | #include <vector> |
| 15 | #include <bzlib.h> |
| 16 | #include "chromeos/obsolete_logging.h" |
| 17 | #include "update_engine/bzip.h" |
| 18 | #include "update_engine/cycle_breaker.h" |
| 19 | #include "update_engine/extent_mapper.h" |
| 20 | #include "update_engine/file_writer.h" |
| 21 | #include "update_engine/filesystem_iterator.h" |
| 22 | #include "update_engine/graph_types.h" |
| 23 | #include "update_engine/graph_utils.h" |
| 24 | #include "update_engine/subprocess.h" |
| 25 | #include "update_engine/topological_sort.h" |
| 26 | #include "update_engine/update_metadata.pb.h" |
| 27 | #include "update_engine/utils.h" |
| 28 | |
| 29 | using std::make_pair; |
| 30 | using std::min; |
| 31 | using std::set; |
| 32 | using std::string; |
| 33 | using std::vector; |
| 34 | |
| 35 | namespace chromeos_update_engine { |
| 36 | |
| 37 | typedef DeltaDiffGenerator::Block Block; |
| 38 | |
| 39 | namespace { |
| 40 | const size_t kBlockSize = 4096; |
| 41 | const char* const kBsdiffPath = "/usr/bin/bsdiff"; |
| 42 | const uint64 kVersionNumber = 1; |
| 43 | const char* const kDeltaMagic = "CrAU"; |
| 44 | |
| 45 | // Stores all Extents for a file into 'out'. Returns true on success. |
| 46 | bool GatherExtents(const string& path, |
| 47 | google::protobuf::RepeatedPtrField<Extent>* out) { |
| 48 | vector<Extent> extents; |
| 49 | TEST_AND_RETURN_FALSE(extent_mapper::ExtentsForFileFibmap(path, &extents)); |
| 50 | DeltaDiffGenerator::StoreExtents(extents, out); |
| 51 | return true; |
| 52 | } |
| 53 | |
| 54 | // Runs the bsdiff tool on two files and returns the resulting delta in |
| 55 | // 'out'. Returns true on success. |
| 56 | bool BsdiffFiles(const string& old_file, |
| 57 | const string& new_file, |
| 58 | vector<char>* out) { |
| 59 | const string kPatchFile = "/tmp/delta.patchXXXXXX"; |
| 60 | string patch_file_path; |
| 61 | |
| 62 | TEST_AND_RETURN_FALSE( |
| 63 | utils::MakeTempFile(kPatchFile, &patch_file_path, NULL)); |
| 64 | |
| 65 | vector<string> cmd; |
| 66 | cmd.push_back(kBsdiffPath); |
| 67 | cmd.push_back(old_file); |
| 68 | cmd.push_back(new_file); |
| 69 | cmd.push_back(patch_file_path); |
| 70 | |
| 71 | int rc = 1; |
| 72 | vector<char> patch_file; |
| 73 | TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc)); |
| 74 | TEST_AND_RETURN_FALSE(rc == 0); |
| 75 | TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out)); |
| 76 | unlink(patch_file_path.c_str()); |
| 77 | return true; |
| 78 | } |
| 79 | |
| 80 | // The blocks vector contains a reader and writer for each block on the |
| 81 | // filesystem that's being in-place updated. We populate the reader/writer |
| 82 | // fields of blocks by calling this function. |
| 83 | // For each block in 'operation' that is read or written, find that block |
| 84 | // in 'blocks' and set the reader/writer field to the vertex passed. |
| 85 | // 'graph' is not strictly necessary, but useful for printing out |
| 86 | // error messages. |
| 87 | bool AddInstallOpToBlocksVector( |
| 88 | const DeltaArchiveManifest_InstallOperation& operation, |
| 89 | vector<Block>* blocks, |
| 90 | const Graph& graph, |
| 91 | Vertex::Index vertex) { |
| 92 | LOG(INFO) << "AddInstallOpToBlocksVector(" << vertex << "), " |
| 93 | << graph[vertex].file_name; |
| 94 | // See if this is already present. |
| 95 | TEST_AND_RETURN_FALSE(operation.dst_extents_size() > 0); |
| 96 | |
| 97 | enum BlockField { READER = 0, WRITER, BLOCK_FIELD_COUNT }; |
| 98 | for (int field = READER; field < BLOCK_FIELD_COUNT; field++) { |
| 99 | const int extents_size = |
| 100 | (field == READER) ? operation.src_extents_size() : |
| 101 | operation.dst_extents_size(); |
| 102 | const char* past_participle = (field == READER) ? "read" : "written"; |
| 103 | const google::protobuf::RepeatedPtrField<Extent>& extents = |
| 104 | (field == READER) ? operation.src_extents() : operation.dst_extents(); |
| 105 | Vertex::Index Block::*access_type = |
| 106 | (field == READER) ? &Block::reader : &Block::writer; |
| 107 | |
| 108 | for (int i = 0; i < extents_size; i++) { |
| 109 | const Extent& extent = extents.Get(i); |
| 110 | if (extent.start_block() == kSparseHole) { |
| 111 | // Hole in sparse file. skip |
| 112 | continue; |
| 113 | } |
| 114 | for (uint64_t block = extent.start_block(); |
| 115 | block < (extent.start_block() + extent.num_blocks()); block++) { |
| 116 | LOG(INFO) << "ext: " << i << " block: " << block; |
| 117 | if ((*blocks)[block].*access_type != Vertex::kInvalidIndex) { |
| 118 | LOG(FATAL) << "Block " << block << " is already " |
| 119 | << past_participle << " by " |
| 120 | << (*blocks)[block].*access_type << "(" |
| 121 | << graph[(*blocks)[block].*access_type].file_name |
| 122 | << ") and also " << vertex << "(" |
| 123 | << graph[vertex].file_name << ")"; |
| 124 | } |
| 125 | (*blocks)[block].*access_type = vertex; |
| 126 | } |
| 127 | } |
| 128 | } |
| 129 | return true; |
| 130 | } |
| 131 | |
| 132 | // For a given regular file which must exist at new_root + path, and may |
| 133 | // exist at old_root + path, creates a new InstallOperation and adds it to |
| 134 | // the graph. Also, populates the 'blocks' array as necessary. |
| 135 | // Also, writes the data necessary to send the file down to the client |
| 136 | // into data_fd, which has length *data_file_size. *data_file_size is |
| 137 | // updated appropriately. |
| 138 | // Returns true on success. |
| 139 | bool DeltaReadFile(Graph* graph, |
| 140 | vector<Block>* blocks, |
| 141 | const string& old_root, |
| 142 | const string& new_root, |
| 143 | const string& path, // within new_root |
| 144 | int data_fd, |
| 145 | off_t* data_file_size) { |
| 146 | vector<char> data; |
| 147 | DeltaArchiveManifest_InstallOperation operation; |
| 148 | |
| 149 | TEST_AND_RETURN_FALSE(DeltaDiffGenerator::ReadFileToDiff(old_root + path, |
| 150 | new_root + path, |
| 151 | &data, |
| 152 | &operation)); |
| 153 | |
| 154 | // Write the data |
| 155 | if (operation.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE) { |
| 156 | operation.set_data_offset(*data_file_size); |
| 157 | operation.set_data_length(data.size()); |
| 158 | } |
| 159 | |
| 160 | TEST_AND_RETURN_FALSE(utils::WriteAll(data_fd, &data[0], data.size())); |
| 161 | *data_file_size += data.size(); |
| 162 | |
| 163 | // Now, insert into graph and blocks vector |
| 164 | graph->resize(graph->size() + 1); |
| 165 | graph->back().op = operation; |
| 166 | CHECK(graph->back().op.has_type()); |
| 167 | graph->back().file_name = path; |
| 168 | |
| 169 | TEST_AND_RETURN_FALSE(AddInstallOpToBlocksVector(graph->back().op, |
| 170 | blocks, |
| 171 | *graph, |
| 172 | graph->size() - 1)); |
| 173 | return true; |
| 174 | } |
| 175 | |
| 176 | // For each regular file within new_root, creates a node in the graph, |
| 177 | // determines the best way to compress it (REPLACE, REPLACE_BZ, COPY, BSDIFF), |
| 178 | // and writes any necessary data to the end of data_fd. |
| 179 | bool DeltaReadFiles(Graph* graph, |
| 180 | vector<Block>* blocks, |
| 181 | const string& old_root, |
| 182 | const string& new_root, |
| 183 | int data_fd, |
| 184 | off_t* data_file_size) { |
| 185 | set<ino_t> visited_inodes; |
| 186 | for (FilesystemIterator fs_iter(new_root, |
| 187 | utils::SetWithValue<string>("/lost+found")); |
| 188 | !fs_iter.IsEnd(); fs_iter.Increment()) { |
| 189 | if (!S_ISREG(fs_iter.GetStat().st_mode)) |
| 190 | continue; |
| 191 | |
| 192 | // Make sure we visit each inode only once. |
| 193 | if (utils::SetContainsKey(visited_inodes, fs_iter.GetStat().st_ino)) |
| 194 | continue; |
| 195 | visited_inodes.insert(fs_iter.GetStat().st_ino); |
| 196 | if (fs_iter.GetStat().st_size == 0) |
| 197 | continue; |
| 198 | |
| 199 | LOG(INFO) << "Encoding file " << fs_iter.GetPartialPath(); |
| 200 | |
| 201 | TEST_AND_RETURN_FALSE(DeltaReadFile(graph, |
| 202 | blocks, |
| 203 | old_root, |
| 204 | new_root, |
| 205 | fs_iter.GetPartialPath(), |
| 206 | data_fd, |
| 207 | data_file_size)); |
| 208 | } |
| 209 | return true; |
| 210 | } |
| 211 | |
| 212 | // Attempts to find block_count blocks to use as scratch space. |
| 213 | // Returns true on success. |
| 214 | // Right now we return exactly as many blocks as are required. |
| 215 | // TODO(adlr): consider returning all scratch blocks, |
| 216 | // even if there are extras, to make it easier for a scratch allocator |
| 217 | // to find contiguous regions for specific scratch writes. |
| 218 | bool FindScratchSpace(const vector<Block>& blocks, |
| 219 | vector<Block>::size_type block_count, |
| 220 | vector<Extent>* out) { |
| 221 | // Scan blocks for blocks that are neither read nor written. |
| 222 | // If we don't find enough of those, return false. |
| 223 | // TODO(adlr): return blocks that are written by |
| 224 | // operations that don't have incoming edges (and thus, can be |
| 225 | // deferred until all old blocks are read by other operations). |
| 226 | vector<Extent> ret; |
| 227 | vector<Block>::size_type blocks_found = 0; |
| 228 | for (vector<Block>::size_type i = 0; |
| 229 | i < blocks.size() && blocks_found < block_count; i++) { |
| 230 | if (blocks[i].reader == Vertex::kInvalidIndex && |
| 231 | blocks[i].writer == Vertex::kInvalidIndex) { |
| 232 | graph_utils::AppendBlockToExtents(&ret, i); |
| 233 | blocks_found++; |
| 234 | } |
| 235 | } |
| 236 | if (blocks_found == block_count) { |
| 237 | LOG(INFO) << "returning " << blocks_found << " scratch blocks"; |
| 238 | out->swap(ret); |
| 239 | return true; |
| 240 | } |
| 241 | return false; |
| 242 | } |
| 243 | |
| 244 | // This class takes a collection of Extents and allows the client to |
| 245 | // allocate space from these extents. The client must not request more |
| 246 | // space then exists in the source extents. Space is allocated from the |
| 247 | // beginning of the source extents on; no consideration is paid to |
| 248 | // fragmentation. |
| 249 | class LinearExtentAllocator { |
| 250 | public: |
| 251 | explicit LinearExtentAllocator(const vector<Extent>& extents) |
| 252 | : extents_(extents), |
| 253 | extent_index_(0), |
| 254 | extent_blocks_allocated_(0) {} |
| 255 | vector<Extent> Allocate(const uint64_t block_count) { |
| 256 | vector<Extent> ret; |
| 257 | for (uint64_t blocks = 0; blocks < block_count; blocks++) { |
| 258 | CHECK_LT(extent_index_, extents_.size()); |
| 259 | CHECK_LT(extent_blocks_allocated_, extents_[extent_index_].num_blocks()); |
| 260 | graph_utils::AppendBlockToExtents( |
| 261 | &ret, |
| 262 | extents_[extent_index_].start_block() + extent_blocks_allocated_); |
| 263 | extent_blocks_allocated_++; |
| 264 | if (extent_blocks_allocated_ >= extents_[extent_index_].num_blocks()) { |
| 265 | extent_blocks_allocated_ = 0; |
| 266 | extent_index_++; |
| 267 | } |
| 268 | } |
| 269 | return ret; |
| 270 | } |
| 271 | private: |
| 272 | const vector<Extent> extents_; |
| 273 | vector<Extent>::size_type extent_index_; // current Extent |
| 274 | // number of blocks allocated from the current extent |
| 275 | uint64_t extent_blocks_allocated_; |
| 276 | }; |
| 277 | |
| 278 | // Reads blocks from image_path that are not yet marked as being written |
| 279 | // in the blocks array. These blocks that remain are non-file-data blocks. |
| 280 | // In the future we might consider intelligent diffing between this data |
| 281 | // and data in the previous image, but for now we just bzip2 compress it |
| 282 | // and include it in the update. |
| 283 | // Creates a new node in the graph to write these blocks and writes the |
| 284 | // appropriate blob to blobs_fd. Reads and updates blobs_length; |
| 285 | bool ReadUnwrittenBlocks(const vector<Block>& blocks, |
| 286 | int blobs_fd, |
| 287 | off_t* blobs_length, |
| 288 | const string& image_path, |
| 289 | DeltaArchiveManifest_InstallOperation* out_op) { |
| 290 | int image_fd = open(image_path.c_str(), O_RDONLY, 000); |
| 291 | TEST_AND_RETURN_FALSE_ERRNO(image_fd >= 0); |
| 292 | ScopedFdCloser image_fd_closer(&image_fd); |
| 293 | |
| 294 | string temp_file_path; |
| 295 | TEST_AND_RETURN_FALSE(utils::MakeTempFile("/tmp/CrAU_temp_data.XXXXXX", |
| 296 | &temp_file_path, |
| 297 | NULL)); |
| 298 | |
| 299 | FILE* file = fopen(temp_file_path.c_str(), "w"); |
| 300 | TEST_AND_RETURN_FALSE(file); |
| 301 | int err = BZ_OK; |
| 302 | |
| 303 | BZFILE* bz_file = BZ2_bzWriteOpen(&err, |
| 304 | file, |
| 305 | 9, // max compression |
| 306 | 0, // verbosity |
| 307 | 0); // default work factor |
| 308 | TEST_AND_RETURN_FALSE(err == BZ_OK); |
| 309 | |
| 310 | vector<Extent> extents; |
| 311 | vector<Block>::size_type block_count = 0; |
| 312 | |
| 313 | LOG(INFO) << "Appending left over blocks to extents"; |
| 314 | for (vector<Block>::size_type i = 0; i < blocks.size(); i++) { |
| 315 | if (blocks[i].writer != Vertex::kInvalidIndex) |
| 316 | continue; |
| 317 | graph_utils::AppendBlockToExtents(&extents, i); |
| 318 | block_count++; |
| 319 | } |
| 320 | |
| 321 | // Code will handle 'buf' at any size that's a multiple of kBlockSize, |
| 322 | // so we arbitrarily set it to 1024 * kBlockSize. |
| 323 | vector<char> buf(1024 * kBlockSize); |
| 324 | |
| 325 | LOG(INFO) << "Reading left over blocks"; |
| 326 | vector<Block>::size_type blocks_copied_count = 0; |
| 327 | |
| 328 | // For each extent in extents, write the data into BZ2_bzWrite which |
| 329 | // sends it to an output file. |
| 330 | // We use the temporary buffer 'buf' to hold the data, which may be |
| 331 | // smaller than the extent, so in that case we have to loop to get |
| 332 | // the extent's data (that's the inner while loop). |
| 333 | for (vector<Extent>::const_iterator it = extents.begin(); |
| 334 | it != extents.end(); ++it) { |
| 335 | vector<Block>::size_type blocks_read = 0; |
| 336 | while (blocks_read < it->num_blocks()) { |
| 337 | const int copy_block_cnt = |
| 338 | min(buf.size() / kBlockSize, |
| 339 | static_cast<vector<char>::size_type>( |
| 340 | it->num_blocks() - blocks_read)); |
| 341 | ssize_t rc = pread(image_fd, |
| 342 | &buf[0], |
| 343 | copy_block_cnt * kBlockSize, |
| 344 | (it->start_block() + blocks_read) * kBlockSize); |
| 345 | TEST_AND_RETURN_FALSE_ERRNO(rc >= 0); |
| 346 | TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) == |
| 347 | copy_block_cnt * kBlockSize); |
| 348 | BZ2_bzWrite(&err, bz_file, &buf[0], copy_block_cnt * kBlockSize); |
| 349 | TEST_AND_RETURN_FALSE(err == BZ_OK); |
| 350 | blocks_read += copy_block_cnt; |
| 351 | blocks_copied_count += copy_block_cnt; |
| 352 | LOG(INFO) << "progress: " << ((float)blocks_copied_count)/block_count; |
| 353 | } |
| 354 | } |
| 355 | BZ2_bzWriteClose(&err, bz_file, 0, NULL, NULL); |
| 356 | TEST_AND_RETURN_FALSE(err == BZ_OK); |
| 357 | bz_file = NULL; |
| 358 | TEST_AND_RETURN_FALSE_ERRNO(0 == fclose(file)); |
| 359 | file = NULL; |
| 360 | |
| 361 | vector<char> compressed_data; |
| 362 | LOG(INFO) << "Reading compressed data off disk"; |
| 363 | TEST_AND_RETURN_FALSE(utils::ReadFile(temp_file_path, &compressed_data)); |
| 364 | TEST_AND_RETURN_FALSE(unlink(temp_file_path.c_str()) == 0); |
| 365 | |
| 366 | // Add node to graph to write these blocks |
| 367 | out_op->set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ); |
| 368 | out_op->set_data_offset(*blobs_length); |
| 369 | out_op->set_data_length(compressed_data.size()); |
| 370 | *blobs_length += compressed_data.size(); |
| 371 | out_op->set_dst_length(kBlockSize * block_count); |
| 372 | DeltaDiffGenerator::StoreExtents(extents, out_op->mutable_dst_extents()); |
| 373 | |
| 374 | TEST_AND_RETURN_FALSE(utils::WriteAll(blobs_fd, |
| 375 | &compressed_data[0], |
| 376 | compressed_data.size())); |
| 377 | LOG(INFO) << "done with extra blocks"; |
| 378 | return true; |
| 379 | } |
| 380 | |
| 381 | // Writes the uint64 passed in in host-endian to the file as big-endian. |
| 382 | // Returns true on success. |
| 383 | bool WriteUint64AsBigEndian(FileWriter* writer, const uint64 value) { |
| 384 | uint64 value_be = htobe64(value); |
| 385 | TEST_AND_RETURN_FALSE(writer->Write(&value_be, sizeof(value_be)) == |
| 386 | sizeof(value_be)); |
| 387 | return true; |
| 388 | } |
| 389 | |
| 390 | // Adds each operation from the graph to the manifest in the order |
| 391 | // specified by 'order'. |
| 392 | void InstallOperationsToManifest( |
| 393 | const Graph& graph, |
| 394 | const vector<Vertex::Index>& order, |
| 395 | DeltaArchiveManifest* out_manifest) { |
| 396 | for (vector<Vertex::Index>::const_iterator it = order.begin(); |
| 397 | it != order.end(); ++it) { |
| 398 | DeltaArchiveManifest_InstallOperation* op = |
| 399 | out_manifest->add_install_operations(); |
| 400 | *op = graph[*it].op; |
| 401 | } |
| 402 | } |
| 403 | |
| 404 | void CheckGraph(const Graph& graph) { |
| 405 | for (Graph::const_iterator it = graph.begin(); it != graph.end(); ++it) { |
| 406 | CHECK(it->op.has_type()); |
| 407 | } |
| 408 | } |
| 409 | |
| 410 | } // namespace {} |
| 411 | |
| 412 | bool DeltaDiffGenerator::ReadFileToDiff( |
| 413 | const string& old_filename, |
| 414 | const string& new_filename, |
| 415 | vector<char>* out_data, |
| 416 | DeltaArchiveManifest_InstallOperation* out_op) { |
| 417 | // Read new data in |
| 418 | vector<char> new_data; |
| 419 | TEST_AND_RETURN_FALSE(utils::ReadFile(new_filename, &new_data)); |
| 420 | |
| 421 | TEST_AND_RETURN_FALSE(!new_data.empty()); |
| 422 | |
| 423 | vector<char> new_data_bz; |
| 424 | TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz)); |
| 425 | CHECK(!new_data_bz.empty()); |
| 426 | |
| 427 | vector<char> data; // Data blob that will be written to delta file. |
| 428 | |
| 429 | DeltaArchiveManifest_InstallOperation operation; |
| 430 | size_t current_best_size = 0; |
| 431 | if (new_data.size() <= new_data_bz.size()) { |
| 432 | operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE); |
| 433 | current_best_size = new_data.size(); |
| 434 | data = new_data; |
| 435 | } else { |
| 436 | operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ); |
| 437 | current_best_size = new_data_bz.size(); |
| 438 | data = new_data_bz; |
| 439 | } |
| 440 | |
| 441 | // Do we have an original file to consider? |
| 442 | struct stat old_stbuf; |
| 443 | if (0 != stat(old_filename.c_str(), &old_stbuf)) { |
| 444 | // If stat-ing the old file fails, it should be because it doesn't exist. |
| 445 | TEST_AND_RETURN_FALSE(errno == ENOTDIR || errno == ENOENT); |
| 446 | } else { |
| 447 | // Read old data |
| 448 | vector<char> old_data; |
| 449 | TEST_AND_RETURN_FALSE(utils::ReadFile(old_filename, &old_data)); |
| 450 | if (old_data == new_data) { |
| 451 | // No change in data. |
| 452 | operation.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE); |
| 453 | current_best_size = 0; |
| 454 | data.clear(); |
| 455 | } else { |
| 456 | // Try bsdiff of old to new data |
| 457 | vector<char> bsdiff_delta; |
| 458 | TEST_AND_RETURN_FALSE( |
| 459 | BsdiffFiles(old_filename, new_filename, &bsdiff_delta)); |
| 460 | CHECK_GT(bsdiff_delta.size(), 0); |
| 461 | if (bsdiff_delta.size() < current_best_size) { |
| 462 | operation.set_type(DeltaArchiveManifest_InstallOperation_Type_BSDIFF); |
| 463 | current_best_size = bsdiff_delta.size(); |
| 464 | |
| 465 | data = bsdiff_delta; |
| 466 | } |
| 467 | } |
| 468 | } |
| 469 | |
| 470 | // Set parameters of the operations |
| 471 | CHECK_EQ(data.size(), current_best_size); |
| 472 | |
| 473 | if (operation.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE || |
| 474 | operation.type() == DeltaArchiveManifest_InstallOperation_Type_BSDIFF) { |
| 475 | TEST_AND_RETURN_FALSE( |
| 476 | GatherExtents(old_filename, operation.mutable_src_extents())); |
| 477 | operation.set_src_length(old_stbuf.st_size); |
| 478 | } |
| 479 | |
| 480 | TEST_AND_RETURN_FALSE( |
| 481 | GatherExtents(new_filename, operation.mutable_dst_extents())); |
| 482 | operation.set_dst_length(new_data.size()); |
| 483 | |
| 484 | out_data->swap(data); |
| 485 | *out_op = operation; |
| 486 | |
| 487 | return true; |
| 488 | } |
| 489 | |
| 490 | void DeltaDiffGenerator::SubstituteBlocks( |
| 491 | DeltaArchiveManifest_InstallOperation* op, |
| 492 | const vector<Extent>& remove_extents, |
| 493 | const vector<Extent>& replace_extents) { |
| 494 | // First, expand out the blocks that op reads from |
| 495 | vector<uint64> read_blocks; |
| 496 | for (int i = 0; i < op->src_extents_size(); i++) { |
| 497 | const Extent& extent = op->src_extents(i); |
| 498 | if (extent.start_block() == kSparseHole) { |
| 499 | read_blocks.resize(read_blocks.size() + extent.num_blocks(), kSparseHole); |
| 500 | } else { |
| 501 | for (uint64 block = extent.start_block(); |
| 502 | block < (extent.start_block() + extent.num_blocks()); block++) { |
| 503 | read_blocks.push_back(block); |
| 504 | } |
| 505 | } |
| 506 | } |
| 507 | { |
| 508 | // Expand remove_extents and replace_extents |
| 509 | vector<uint64> remove_extents_expanded; |
| 510 | for (vector<Extent>::const_iterator it = remove_extents.begin(); |
| 511 | it != remove_extents.end(); ++it) { |
| 512 | const Extent& extent = *it; |
| 513 | for (uint64 block = extent.start_block(); |
| 514 | block < (extent.start_block() + extent.num_blocks()); block++) { |
| 515 | remove_extents_expanded.push_back(block); |
| 516 | } |
| 517 | } |
| 518 | vector<uint64> replace_extents_expanded; |
| 519 | for (vector<Extent>::const_iterator it = replace_extents.begin(); |
| 520 | it != replace_extents.end(); ++it) { |
| 521 | const Extent& extent = *it; |
| 522 | for (uint64 block = extent.start_block(); |
| 523 | block < (extent.start_block() + extent.num_blocks()); block++) { |
| 524 | replace_extents_expanded.push_back(block); |
| 525 | } |
| 526 | } |
| 527 | CHECK_EQ(remove_extents_expanded.size(), replace_extents_expanded.size()); |
| 528 | for (vector<uint64>::size_type i = 0; |
| 529 | i < replace_extents_expanded.size(); i++) { |
| 530 | vector<uint64>::size_type index = 0; |
| 531 | CHECK(utils::VectorIndexOf(read_blocks, |
| 532 | remove_extents_expanded[i], |
| 533 | &index)); |
| 534 | CHECK(read_blocks[index] == remove_extents_expanded[i]); |
| 535 | read_blocks[index] = replace_extents_expanded[i]; |
| 536 | } |
| 537 | } |
| 538 | // Convert read_blocks back to extents |
| 539 | op->clear_src_extents(); |
| 540 | vector<Extent> new_extents; |
| 541 | for (vector<uint64>::const_iterator it = read_blocks.begin(); |
| 542 | it != read_blocks.end(); ++it) { |
| 543 | graph_utils::AppendBlockToExtents(&new_extents, *it); |
| 544 | } |
| 545 | DeltaDiffGenerator::StoreExtents(new_extents, op->mutable_src_extents()); |
| 546 | } |
| 547 | |
| 548 | bool DeltaDiffGenerator::CutEdges(Graph* graph, |
| 549 | const vector<Block>& blocks, |
| 550 | const set<Edge>& edges) { |
| 551 | // First, find enough scratch space for the edges we'll be cutting. |
| 552 | vector<Block>::size_type blocks_required = 0; |
| 553 | for (set<Edge>::const_iterator it = edges.begin(); it != edges.end(); ++it) { |
| 554 | blocks_required += graph_utils::EdgeWeight(*graph, *it); |
| 555 | } |
| 556 | vector<Extent> scratch_extents; |
| 557 | LOG(INFO) << "requesting " << blocks_required << " blocks of scratch"; |
| 558 | TEST_AND_RETURN_FALSE( |
| 559 | FindScratchSpace(blocks, blocks_required, &scratch_extents)); |
| 560 | LinearExtentAllocator scratch_allocator(scratch_extents); |
| 561 | |
| 562 | uint64_t scratch_blocks_used = 0; |
| 563 | for (set<Edge>::const_iterator it = edges.begin(); |
| 564 | it != edges.end(); ++it) { |
| 565 | vector<Extent> old_extents = |
| 566 | (*graph)[it->first].out_edges[it->second].extents; |
| 567 | // Choose some scratch space |
| 568 | scratch_blocks_used += graph_utils::EdgeWeight(*graph, *it); |
| 569 | LOG(INFO) << "using " << graph_utils::EdgeWeight(*graph, *it) |
| 570 | << " scratch blocks (" |
| 571 | << scratch_blocks_used << ")"; |
| 572 | vector<Extent> scratch = |
| 573 | scratch_allocator.Allocate(graph_utils::EdgeWeight(*graph, *it)); |
| 574 | // create vertex to copy original->scratch |
| 575 | graph->resize(graph->size() + 1); |
| 576 | |
| 577 | // make node depend on the copy operation |
| 578 | (*graph)[it->first].out_edges.insert(make_pair(graph->size() - 1, |
| 579 | EdgeProperties())); |
| 580 | |
| 581 | // Set src/dst extents and other proto variables for copy operation |
| 582 | graph->back().op.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE); |
| 583 | DeltaDiffGenerator::StoreExtents( |
| 584 | (*graph)[it->first].out_edges[it->second].extents, |
| 585 | graph->back().op.mutable_src_extents()); |
| 586 | DeltaDiffGenerator::StoreExtents(scratch, |
| 587 | graph->back().op.mutable_dst_extents()); |
| 588 | graph->back().op.set_src_length( |
| 589 | graph_utils::EdgeWeight(*graph, *it) * kBlockSize); |
| 590 | graph->back().op.set_dst_length(graph->back().op.src_length()); |
| 591 | |
| 592 | // make the dest node read from the scratch space |
| 593 | DeltaDiffGenerator::SubstituteBlocks( |
| 594 | &((*graph)[it->second].op), |
| 595 | (*graph)[it->first].out_edges[it->second].extents, |
| 596 | scratch); |
| 597 | |
| 598 | // delete the old edge |
| 599 | CHECK_EQ(1, (*graph)[it->first].out_edges.erase(it->second)); |
| 600 | } |
| 601 | return true; |
| 602 | } |
| 603 | |
| 604 | // Stores all Extents in 'extents' into 'out'. |
| 605 | void DeltaDiffGenerator::StoreExtents( |
| 606 | vector<Extent>& extents, |
| 607 | google::protobuf::RepeatedPtrField<Extent>* out) { |
| 608 | for (vector<Extent>::const_iterator it = extents.begin(); |
| 609 | it != extents.end(); ++it) { |
| 610 | Extent* new_extent = out->Add(); |
| 611 | *new_extent = *it; |
| 612 | } |
| 613 | } |
| 614 | |
| 615 | // Creates all the edges for the graph. Writers of a block point to |
| 616 | // readers of the same block. This is because for an edge A->B, B |
| 617 | // must complete before A executes. |
| 618 | void DeltaDiffGenerator::CreateEdges(Graph* graph, |
| 619 | const vector<Block>& blocks) { |
| 620 | for (vector<Block>::size_type i = 0; i < blocks.size(); i++) { |
| 621 | // Blocks with both a reader and writer get an edge |
| 622 | if (blocks[i].reader == Vertex::kInvalidIndex || |
| 623 | blocks[i].writer == Vertex::kInvalidIndex) |
| 624 | continue; |
| 625 | // Don't have a node depend on itself |
| 626 | if (blocks[i].reader == blocks[i].writer) |
| 627 | continue; |
| 628 | // See if there's already an edge we can add onto |
| 629 | Vertex::EdgeMap::iterator edge_it = |
| 630 | (*graph)[blocks[i].writer].out_edges.find(blocks[i].reader); |
| 631 | if (edge_it == (*graph)[blocks[i].writer].out_edges.end()) { |
| 632 | // No existing edge. Create one |
| 633 | (*graph)[blocks[i].writer].out_edges.insert( |
| 634 | make_pair(blocks[i].reader, EdgeProperties())); |
| 635 | edge_it = (*graph)[blocks[i].writer].out_edges.find(blocks[i].reader); |
| 636 | CHECK_NE(edge_it, (*graph)[blocks[i].writer].out_edges.end()); |
| 637 | } |
| 638 | graph_utils::AppendBlockToExtents(&edge_it->second.extents, i); |
| 639 | } |
| 640 | } |
| 641 | |
| 642 | bool DeltaDiffGenerator::ReorderDataBlobs( |
| 643 | DeltaArchiveManifest* manifest, |
| 644 | const std::string& data_blobs_path, |
| 645 | const std::string& new_data_blobs_path) { |
| 646 | int in_fd = open(data_blobs_path.c_str(), O_RDONLY, 0); |
| 647 | TEST_AND_RETURN_FALSE_ERRNO(in_fd >= 0); |
| 648 | ScopedFdCloser in_fd_closer(&in_fd); |
| 649 | |
| 650 | DirectFileWriter writer; |
| 651 | TEST_AND_RETURN_FALSE( |
| 652 | writer.Open(new_data_blobs_path.c_str(), |
| 653 | O_WRONLY | O_TRUNC | O_CREAT, |
| 654 | 0644) == 0); |
| 655 | ScopedFileWriterCloser writer_closer(&writer); |
| 656 | uint64 out_file_size = 0; |
| 657 | |
| 658 | for (int i = 0; i < manifest->install_operations_size(); i++) { |
| 659 | DeltaArchiveManifest_InstallOperation* op = |
| 660 | manifest->mutable_install_operations(i); |
| 661 | if (!op->has_data_offset()) |
| 662 | continue; |
| 663 | CHECK(op->has_data_length()); |
| 664 | vector<char> buf(op->data_length()); |
| 665 | ssize_t rc = pread(in_fd, &buf[0], buf.size(), op->data_offset()); |
| 666 | TEST_AND_RETURN_FALSE(rc == static_cast<ssize_t>(buf.size())); |
| 667 | |
| 668 | op->set_data_offset(out_file_size); |
| 669 | TEST_AND_RETURN_FALSE(writer.Write(&buf[0], buf.size()) == |
| 670 | static_cast<ssize_t>(buf.size())); |
| 671 | out_file_size += buf.size(); |
| 672 | } |
| 673 | return true; |
| 674 | } |
| 675 | |
| 676 | bool DeltaDiffGenerator::GenerateDeltaUpdateFile(const string& old_root, |
| 677 | const string& old_image, |
| 678 | const string& new_root, |
| 679 | const string& new_image, |
| 680 | const string& output_path) { |
| 681 | struct stat old_image_stbuf; |
| 682 | TEST_AND_RETURN_FALSE_ERRNO(stat(old_image.c_str(), &old_image_stbuf) == 0); |
| 683 | struct stat new_image_stbuf; |
| 684 | TEST_AND_RETURN_FALSE_ERRNO(stat(new_image.c_str(), &new_image_stbuf) == 0); |
| 685 | LOG_IF(WARNING, new_image_stbuf.st_size != old_image_stbuf.st_size) |
| 686 | << "Old and new images are different sizes."; |
| 687 | LOG_IF(FATAL, new_image_stbuf.st_size % kBlockSize) |
| 688 | << "New image not a multiple of block size " << kBlockSize; |
| 689 | LOG_IF(FATAL, old_image_stbuf.st_size % kBlockSize) |
| 690 | << "Old image not a multiple of block size " << kBlockSize; |
| 691 | |
| 692 | vector<Block> blocks(min(old_image_stbuf.st_size / kBlockSize, |
| 693 | new_image_stbuf.st_size / kBlockSize)); |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 694 | LOG(INFO) << "w:" << blocks[4097].writer; |
| 695 | LOG(INFO) << "invalid: " << Vertex::kInvalidIndex; |
| 696 | LOG(INFO) << "len: " << blocks.size(); |
| 697 | for (vector<Block>::size_type i = 0; i < blocks.size(); i++) { |
| 698 | CHECK(blocks[i].reader == Vertex::kInvalidIndex); |
| 699 | CHECK(blocks[i].writer == Vertex::kInvalidIndex); |
| 700 | } |
| 701 | Graph graph; |
| 702 | CheckGraph(graph); |
| 703 | |
| 704 | const string kTempFileTemplate("/tmp/CrAU_temp_data.XXXXXX"); |
| 705 | string temp_file_path; |
| 706 | off_t data_file_size = 0; |
| 707 | |
| 708 | LOG(INFO) << "Reading files..."; |
| 709 | |
| 710 | DeltaArchiveManifest_InstallOperation final_op; |
| 711 | { |
| 712 | int fd; |
| 713 | TEST_AND_RETURN_FALSE( |
| 714 | utils::MakeTempFile(kTempFileTemplate, &temp_file_path, &fd)); |
| 715 | TEST_AND_RETURN_FALSE(fd >= 0); |
| 716 | ScopedFdCloser fd_closer(&fd); |
| 717 | |
| 718 | TEST_AND_RETURN_FALSE(DeltaReadFiles(&graph, |
| 719 | &blocks, |
| 720 | old_root, |
| 721 | new_root, |
| 722 | fd, |
| 723 | &data_file_size)); |
| 724 | CheckGraph(graph); |
| 725 | |
| 726 | // TODO(adlr): read all the rest of the blocks in |
| 727 | TEST_AND_RETURN_FALSE(ReadUnwrittenBlocks(blocks, |
| 728 | fd, |
| 729 | &data_file_size, |
| 730 | new_image, |
| 731 | &final_op)); |
| 732 | } |
| 733 | CheckGraph(graph); |
| 734 | |
| 735 | LOG(INFO) << "Creating edges..."; |
| 736 | CreateEdges(&graph, blocks); |
| 737 | CheckGraph(graph); |
| 738 | |
| 739 | CycleBreaker cycle_breaker; |
| 740 | LOG(INFO) << "Finding cycles..."; |
| 741 | set<Edge> cut_edges; |
| 742 | cycle_breaker.BreakCycles(graph, &cut_edges); |
| 743 | CheckGraph(graph); |
| 744 | |
| 745 | // Calculate number of scratch blocks needed |
| 746 | |
| 747 | LOG(INFO) << "Cutting cycles..."; |
| 748 | TEST_AND_RETURN_FALSE(CutEdges(&graph, blocks, cut_edges)); |
| 749 | CheckGraph(graph); |
| 750 | |
| 751 | vector<Vertex::Index> final_order; |
| 752 | LOG(INFO) << "Ordering..."; |
| 753 | TopologicalSort(graph, &final_order); |
| 754 | CheckGraph(graph); |
| 755 | |
| 756 | // Convert to protobuf Manifest object |
| 757 | DeltaArchiveManifest manifest; |
| 758 | CheckGraph(graph); |
| 759 | InstallOperationsToManifest(graph, final_order, &manifest); |
| 760 | { |
| 761 | // Write final operation |
| 762 | DeltaArchiveManifest_InstallOperation* op = |
| 763 | manifest.add_install_operations(); |
| 764 | *op = final_op; |
| 765 | CHECK(op->has_type()); |
| 766 | LOG(INFO) << "final op length: " << op->data_length(); |
| 767 | } |
| 768 | CheckGraph(graph); |
| 769 | manifest.set_block_size(kBlockSize); |
| 770 | // TODO(adlr): set checksums |
| 771 | |
| 772 | // Reorder the data blobs with the newly ordered manifest |
| 773 | string ordered_blobs_path; |
| 774 | TEST_AND_RETURN_FALSE(utils::MakeTempFile( |
| 775 | "/tmp/CrAU_temp_data.ordered.XXXXXX", |
| 776 | &ordered_blobs_path, |
| 777 | false)); |
| 778 | TEST_AND_RETURN_FALSE(ReorderDataBlobs(&manifest, |
| 779 | temp_file_path, |
| 780 | ordered_blobs_path)); |
| 781 | |
| 782 | // Check that install op blobs are in order and that all blocks are written. |
| 783 | { |
| 784 | vector<uint32> written_count(blocks.size(), 0); |
| 785 | uint64 next_blob_offset = 0; |
| 786 | for (int i = 0; i < manifest.install_operations_size(); i++) { |
| 787 | const DeltaArchiveManifest_InstallOperation& op = |
| 788 | manifest.install_operations(i); |
| 789 | for (int j = 0; j < op.dst_extents_size(); j++) { |
| 790 | const Extent& extent = op.dst_extents(j); |
| 791 | for (uint64 block = extent.start_block(); |
| 792 | block < (extent.start_block() + extent.num_blocks()); block++) { |
| 793 | written_count[block]++; |
| 794 | } |
| 795 | } |
| 796 | if (op.has_data_offset()) { |
| 797 | if (op.data_offset() != next_blob_offset) { |
| 798 | LOG(FATAL) << "bad blob offset! " << op.data_offset() << " != " |
| 799 | << next_blob_offset; |
| 800 | } |
| 801 | next_blob_offset += op.data_length(); |
| 802 | } |
| 803 | } |
| 804 | // check all blocks written to |
| 805 | for (vector<uint32>::size_type i = 0; i < written_count.size(); i++) { |
| 806 | if (written_count[i] == 0) { |
| 807 | LOG(FATAL) << "block " << i << " not written!"; |
| 808 | } |
| 809 | } |
| 810 | } |
| 811 | |
| 812 | // Serialize protobuf |
| 813 | string serialized_manifest; |
| 814 | |
| 815 | CheckGraph(graph); |
| 816 | TEST_AND_RETURN_FALSE(manifest.AppendToString(&serialized_manifest)); |
| 817 | CheckGraph(graph); |
| 818 | |
| 819 | LOG(INFO) << "Writing final delta file header..."; |
| 820 | DirectFileWriter writer; |
| 821 | TEST_AND_RETURN_FALSE_ERRNO(writer.Open(output_path.c_str(), |
| 822 | O_WRONLY | O_CREAT | O_TRUNC, |
| 823 | 0644) == 0); |
| 824 | ScopedFileWriterCloser writer_closer(&writer); |
| 825 | |
| 826 | // Write header |
| 827 | TEST_AND_RETURN_FALSE(writer.Write(kDeltaMagic, strlen(kDeltaMagic)) == |
Andrew de los Reyes | 08c4e27 | 2010-04-15 14:02:17 -0700 | [diff] [blame^] | 828 | static_cast<ssize_t>(strlen(kDeltaMagic))); |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 829 | |
| 830 | // Write version number |
| 831 | TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer, kVersionNumber)); |
| 832 | |
| 833 | // Write protobuf length |
| 834 | TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer, |
| 835 | serialized_manifest.size())); |
| 836 | |
| 837 | // Write protobuf |
| 838 | LOG(INFO) << "Writing final delta file protobuf... " |
| 839 | << serialized_manifest.size(); |
| 840 | TEST_AND_RETURN_FALSE(writer.Write(serialized_manifest.data(), |
| 841 | serialized_manifest.size()) == |
| 842 | static_cast<ssize_t>(serialized_manifest.size())); |
| 843 | |
| 844 | // Append the data blobs |
| 845 | LOG(INFO) << "Writing final delta file data blobs..."; |
| 846 | int blobs_fd = open(temp_file_path.c_str(), O_RDONLY, 0); |
| 847 | ScopedFdCloser blobs_fd_closer(&blobs_fd); |
| 848 | TEST_AND_RETURN_FALSE(blobs_fd >= 0); |
| 849 | for (;;) { |
| 850 | char buf[kBlockSize]; |
| 851 | ssize_t rc = read(blobs_fd, buf, sizeof(buf)); |
| 852 | if (0 == rc) { |
| 853 | // EOF |
| 854 | break; |
| 855 | } |
| 856 | TEST_AND_RETURN_FALSE_ERRNO(rc > 0); |
| 857 | TEST_AND_RETURN_FALSE(writer.Write(buf, rc) == rc); |
| 858 | } |
| 859 | |
| 860 | LOG(INFO) << "All done. Successfully created delta file."; |
| 861 | return true; |
| 862 | } |
| 863 | |
| 864 | }; // namespace chromeos_update_engine |