blob: 676629aa03658b7490bd2e9430245d64ae281a4d [file] [log] [blame]
Darin Petkovc0b7a532010-09-29 15:18:14 -07001// Copyright (c) 2010 The Chromium OS Authors. All rights reserved.
adlr@google.com3defe6a2009-12-04 20:57:17 +00002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "update_engine/delta_diff_generator.h"
Darin Petkov880335c2010-10-01 15:52:53 -07006
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07007#include <errno.h>
8#include <fcntl.h>
Darin Petkov880335c2010-10-01 15:52:53 -07009#include <sys/stat.h>
10#include <sys/types.h>
11
Andrew de los Reyesb10320d2010-03-31 16:44:44 -070012#include <algorithm>
Andrew de los Reyesef017552010-10-06 17:57:52 -070013#include <map>
Andrew de los Reyesb10320d2010-03-31 16:44:44 -070014#include <set>
15#include <string>
16#include <utility>
17#include <vector>
Darin Petkov880335c2010-10-01 15:52:53 -070018
19#include <base/logging.h>
20#include <base/string_util.h>
Andrew de los Reyesb10320d2010-03-31 16:44:44 -070021#include <bzlib.h>
Darin Petkov880335c2010-10-01 15:52:53 -070022
Andrew de los Reyesb10320d2010-03-31 16:44:44 -070023#include "update_engine/bzip.h"
24#include "update_engine/cycle_breaker.h"
25#include "update_engine/extent_mapper.h"
Andrew de los Reyesef017552010-10-06 17:57:52 -070026#include "update_engine/extent_ranges.h"
Andrew de los Reyesb10320d2010-03-31 16:44:44 -070027#include "update_engine/file_writer.h"
28#include "update_engine/filesystem_iterator.h"
29#include "update_engine/graph_types.h"
30#include "update_engine/graph_utils.h"
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -070031#include "update_engine/payload_signer.h"
Andrew de los Reyesb10320d2010-03-31 16:44:44 -070032#include "update_engine/subprocess.h"
33#include "update_engine/topological_sort.h"
34#include "update_engine/update_metadata.pb.h"
35#include "update_engine/utils.h"
36
37using std::make_pair;
Andrew de los Reyesef017552010-10-06 17:57:52 -070038using std::map;
Andrew de los Reyes3270f742010-07-15 22:28:14 -070039using std::max;
Andrew de los Reyesb10320d2010-03-31 16:44:44 -070040using std::min;
41using std::set;
42using std::string;
43using std::vector;
44
45namespace chromeos_update_engine {
46
47typedef DeltaDiffGenerator::Block Block;
48
49namespace {
50const size_t kBlockSize = 4096;
Darin Petkovc0b7a532010-09-29 15:18:14 -070051const size_t kRootFSPartitionSize = 1 * 1024 * 1024 * 1024; // 1 GiB
Andrew de los Reyes09e56d62010-04-23 13:45:53 -070052const uint64_t kVersionNumber = 1;
Andrew de los Reyesb10320d2010-03-31 16:44:44 -070053
54// Stores all Extents for a file into 'out'. Returns true on success.
55bool GatherExtents(const string& path,
56 google::protobuf::RepeatedPtrField<Extent>* out) {
57 vector<Extent> extents;
58 TEST_AND_RETURN_FALSE(extent_mapper::ExtentsForFileFibmap(path, &extents));
59 DeltaDiffGenerator::StoreExtents(extents, out);
60 return true;
61}
62
63// Runs the bsdiff tool on two files and returns the resulting delta in
64// 'out'. Returns true on success.
65bool BsdiffFiles(const string& old_file,
66 const string& new_file,
67 vector<char>* out) {
68 const string kPatchFile = "/tmp/delta.patchXXXXXX";
69 string patch_file_path;
70
71 TEST_AND_RETURN_FALSE(
72 utils::MakeTempFile(kPatchFile, &patch_file_path, NULL));
73
74 vector<string> cmd;
75 cmd.push_back(kBsdiffPath);
76 cmd.push_back(old_file);
77 cmd.push_back(new_file);
78 cmd.push_back(patch_file_path);
79
80 int rc = 1;
81 vector<char> patch_file;
82 TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc));
83 TEST_AND_RETURN_FALSE(rc == 0);
84 TEST_AND_RETURN_FALSE(utils::ReadFile(patch_file_path, out));
85 unlink(patch_file_path.c_str());
86 return true;
87}
88
89// The blocks vector contains a reader and writer for each block on the
90// filesystem that's being in-place updated. We populate the reader/writer
91// fields of blocks by calling this function.
92// For each block in 'operation' that is read or written, find that block
93// in 'blocks' and set the reader/writer field to the vertex passed.
94// 'graph' is not strictly necessary, but useful for printing out
95// error messages.
96bool AddInstallOpToBlocksVector(
97 const DeltaArchiveManifest_InstallOperation& operation,
98 vector<Block>* blocks,
99 const Graph& graph,
100 Vertex::Index vertex) {
101 LOG(INFO) << "AddInstallOpToBlocksVector(" << vertex << "), "
102 << graph[vertex].file_name;
103 // See if this is already present.
104 TEST_AND_RETURN_FALSE(operation.dst_extents_size() > 0);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700105
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700106 enum BlockField { READER = 0, WRITER, BLOCK_FIELD_COUNT };
107 for (int field = READER; field < BLOCK_FIELD_COUNT; field++) {
108 const int extents_size =
109 (field == READER) ? operation.src_extents_size() :
110 operation.dst_extents_size();
111 const char* past_participle = (field == READER) ? "read" : "written";
112 const google::protobuf::RepeatedPtrField<Extent>& extents =
113 (field == READER) ? operation.src_extents() : operation.dst_extents();
114 Vertex::Index Block::*access_type =
115 (field == READER) ? &Block::reader : &Block::writer;
116
117 for (int i = 0; i < extents_size; i++) {
118 const Extent& extent = extents.Get(i);
119 if (extent.start_block() == kSparseHole) {
120 // Hole in sparse file. skip
121 continue;
122 }
123 for (uint64_t block = extent.start_block();
124 block < (extent.start_block() + extent.num_blocks()); block++) {
125 LOG(INFO) << "ext: " << i << " block: " << block;
126 if ((*blocks)[block].*access_type != Vertex::kInvalidIndex) {
127 LOG(FATAL) << "Block " << block << " is already "
128 << past_participle << " by "
129 << (*blocks)[block].*access_type << "("
130 << graph[(*blocks)[block].*access_type].file_name
131 << ") and also " << vertex << "("
132 << graph[vertex].file_name << ")";
133 }
134 (*blocks)[block].*access_type = vertex;
135 }
136 }
137 }
138 return true;
139}
140
Andrew de los Reyesef017552010-10-06 17:57:52 -0700141// For a given regular file which must exist at new_root + path, and
142// may exist at old_root + path, creates a new InstallOperation and
143// adds it to the graph. Also, populates the |blocks| array as
144// necessary, if |blocks| is non-NULL. Also, writes the data
145// necessary to send the file down to the client into data_fd, which
146// has length *data_file_size. *data_file_size is updated
147// appropriately. If |existing_vertex| is no kInvalidIndex, use that
148// rather than allocating a new vertex. Returns true on success.
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700149bool DeltaReadFile(Graph* graph,
Andrew de los Reyesef017552010-10-06 17:57:52 -0700150 Vertex::Index existing_vertex,
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700151 vector<Block>* blocks,
152 const string& old_root,
153 const string& new_root,
154 const string& path, // within new_root
155 int data_fd,
156 off_t* data_file_size) {
157 vector<char> data;
158 DeltaArchiveManifest_InstallOperation operation;
159
160 TEST_AND_RETURN_FALSE(DeltaDiffGenerator::ReadFileToDiff(old_root + path,
161 new_root + path,
162 &data,
163 &operation));
164
165 // Write the data
166 if (operation.type() != DeltaArchiveManifest_InstallOperation_Type_MOVE) {
167 operation.set_data_offset(*data_file_size);
168 operation.set_data_length(data.size());
169 }
170
171 TEST_AND_RETURN_FALSE(utils::WriteAll(data_fd, &data[0], data.size()));
172 *data_file_size += data.size();
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700173
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700174 // Now, insert into graph and blocks vector
Andrew de los Reyesef017552010-10-06 17:57:52 -0700175 Vertex::Index vertex = existing_vertex;
176 if (vertex == Vertex::kInvalidIndex) {
177 graph->resize(graph->size() + 1);
178 vertex = graph->size() - 1;
179 }
180 (*graph)[vertex].op = operation;
181 CHECK((*graph)[vertex].op.has_type());
182 (*graph)[vertex].file_name = path;
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700183
Andrew de los Reyesef017552010-10-06 17:57:52 -0700184 if (blocks)
185 TEST_AND_RETURN_FALSE(AddInstallOpToBlocksVector((*graph)[vertex].op,
186 blocks,
187 *graph,
188 vertex));
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700189 return true;
190}
191
192// For each regular file within new_root, creates a node in the graph,
193// determines the best way to compress it (REPLACE, REPLACE_BZ, COPY, BSDIFF),
194// and writes any necessary data to the end of data_fd.
195bool DeltaReadFiles(Graph* graph,
196 vector<Block>* blocks,
197 const string& old_root,
198 const string& new_root,
199 int data_fd,
200 off_t* data_file_size) {
201 set<ino_t> visited_inodes;
202 for (FilesystemIterator fs_iter(new_root,
203 utils::SetWithValue<string>("/lost+found"));
204 !fs_iter.IsEnd(); fs_iter.Increment()) {
205 if (!S_ISREG(fs_iter.GetStat().st_mode))
206 continue;
207
208 // Make sure we visit each inode only once.
209 if (utils::SetContainsKey(visited_inodes, fs_iter.GetStat().st_ino))
210 continue;
211 visited_inodes.insert(fs_iter.GetStat().st_ino);
212 if (fs_iter.GetStat().st_size == 0)
213 continue;
214
215 LOG(INFO) << "Encoding file " << fs_iter.GetPartialPath();
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700216
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700217 TEST_AND_RETURN_FALSE(DeltaReadFile(graph,
Andrew de los Reyesef017552010-10-06 17:57:52 -0700218 Vertex::kInvalidIndex,
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700219 blocks,
220 old_root,
221 new_root,
222 fs_iter.GetPartialPath(),
223 data_fd,
224 data_file_size));
225 }
226 return true;
227}
228
Andrew de los Reyesef017552010-10-06 17:57:52 -0700229// This class allocates non-existent temp blocks, starting from
230// kTempBlockStart. Other code is responsible for converting these
231// temp blocks into real blocks, as the client can't read or write to
232// these blocks.
233class DummyExtentAllocator {
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700234 public:
Andrew de los Reyesef017552010-10-06 17:57:52 -0700235 explicit DummyExtentAllocator()
236 : next_block_(kTempBlockStart) {}
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700237 vector<Extent> Allocate(const uint64_t block_count) {
Andrew de los Reyesef017552010-10-06 17:57:52 -0700238 vector<Extent> ret(1);
239 ret[0].set_start_block(next_block_);
240 ret[0].set_num_blocks(block_count);
241 next_block_ += block_count;
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700242 return ret;
243 }
244 private:
Andrew de los Reyesef017552010-10-06 17:57:52 -0700245 uint64_t next_block_;
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700246};
247
248// Reads blocks from image_path that are not yet marked as being written
249// in the blocks array. These blocks that remain are non-file-data blocks.
250// In the future we might consider intelligent diffing between this data
251// and data in the previous image, but for now we just bzip2 compress it
252// and include it in the update.
253// Creates a new node in the graph to write these blocks and writes the
254// appropriate blob to blobs_fd. Reads and updates blobs_length;
255bool ReadUnwrittenBlocks(const vector<Block>& blocks,
256 int blobs_fd,
257 off_t* blobs_length,
258 const string& image_path,
Andrew de los Reyesef017552010-10-06 17:57:52 -0700259 Vertex* vertex) {
260 DeltaArchiveManifest_InstallOperation* out_op = &vertex->op;
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700261 int image_fd = open(image_path.c_str(), O_RDONLY, 000);
262 TEST_AND_RETURN_FALSE_ERRNO(image_fd >= 0);
263 ScopedFdCloser image_fd_closer(&image_fd);
264
265 string temp_file_path;
266 TEST_AND_RETURN_FALSE(utils::MakeTempFile("/tmp/CrAU_temp_data.XXXXXX",
267 &temp_file_path,
268 NULL));
269
270 FILE* file = fopen(temp_file_path.c_str(), "w");
271 TEST_AND_RETURN_FALSE(file);
272 int err = BZ_OK;
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700273
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700274 BZFILE* bz_file = BZ2_bzWriteOpen(&err,
275 file,
276 9, // max compression
277 0, // verbosity
278 0); // default work factor
279 TEST_AND_RETURN_FALSE(err == BZ_OK);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700280
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700281 vector<Extent> extents;
282 vector<Block>::size_type block_count = 0;
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700283
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700284 LOG(INFO) << "Appending left over blocks to extents";
285 for (vector<Block>::size_type i = 0; i < blocks.size(); i++) {
286 if (blocks[i].writer != Vertex::kInvalidIndex)
287 continue;
Andrew de los Reyesef017552010-10-06 17:57:52 -0700288 if (blocks[i].reader != Vertex::kInvalidIndex) {
289 graph_utils::AddReadBeforeDep(vertex, blocks[i].reader, i);
290 }
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700291 graph_utils::AppendBlockToExtents(&extents, i);
292 block_count++;
293 }
294
295 // Code will handle 'buf' at any size that's a multiple of kBlockSize,
296 // so we arbitrarily set it to 1024 * kBlockSize.
297 vector<char> buf(1024 * kBlockSize);
298
299 LOG(INFO) << "Reading left over blocks";
300 vector<Block>::size_type blocks_copied_count = 0;
301
302 // For each extent in extents, write the data into BZ2_bzWrite which
303 // sends it to an output file.
304 // We use the temporary buffer 'buf' to hold the data, which may be
305 // smaller than the extent, so in that case we have to loop to get
306 // the extent's data (that's the inner while loop).
307 for (vector<Extent>::const_iterator it = extents.begin();
308 it != extents.end(); ++it) {
309 vector<Block>::size_type blocks_read = 0;
310 while (blocks_read < it->num_blocks()) {
311 const int copy_block_cnt =
312 min(buf.size() / kBlockSize,
313 static_cast<vector<char>::size_type>(
314 it->num_blocks() - blocks_read));
315 ssize_t rc = pread(image_fd,
316 &buf[0],
317 copy_block_cnt * kBlockSize,
318 (it->start_block() + blocks_read) * kBlockSize);
319 TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
320 TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) ==
321 copy_block_cnt * kBlockSize);
322 BZ2_bzWrite(&err, bz_file, &buf[0], copy_block_cnt * kBlockSize);
323 TEST_AND_RETURN_FALSE(err == BZ_OK);
324 blocks_read += copy_block_cnt;
325 blocks_copied_count += copy_block_cnt;
326 LOG(INFO) << "progress: " << ((float)blocks_copied_count)/block_count;
327 }
328 }
329 BZ2_bzWriteClose(&err, bz_file, 0, NULL, NULL);
330 TEST_AND_RETURN_FALSE(err == BZ_OK);
331 bz_file = NULL;
332 TEST_AND_RETURN_FALSE_ERRNO(0 == fclose(file));
333 file = NULL;
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700334
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700335 vector<char> compressed_data;
336 LOG(INFO) << "Reading compressed data off disk";
337 TEST_AND_RETURN_FALSE(utils::ReadFile(temp_file_path, &compressed_data));
338 TEST_AND_RETURN_FALSE(unlink(temp_file_path.c_str()) == 0);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700339
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700340 // Add node to graph to write these blocks
341 out_op->set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
342 out_op->set_data_offset(*blobs_length);
343 out_op->set_data_length(compressed_data.size());
Andrew de los Reyesef017552010-10-06 17:57:52 -0700344 LOG(INFO) << "Rootfs non-data blocks compressed take up "
345 << compressed_data.size();
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700346 *blobs_length += compressed_data.size();
347 out_op->set_dst_length(kBlockSize * block_count);
348 DeltaDiffGenerator::StoreExtents(extents, out_op->mutable_dst_extents());
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700349
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700350 TEST_AND_RETURN_FALSE(utils::WriteAll(blobs_fd,
351 &compressed_data[0],
352 compressed_data.size()));
353 LOG(INFO) << "done with extra blocks";
354 return true;
355}
356
Andrew de los Reyes09e56d62010-04-23 13:45:53 -0700357// Writes the uint64_t passed in in host-endian to the file as big-endian.
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700358// Returns true on success.
Andrew de los Reyes09e56d62010-04-23 13:45:53 -0700359bool WriteUint64AsBigEndian(FileWriter* writer, const uint64_t value) {
360 uint64_t value_be = htobe64(value);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700361 TEST_AND_RETURN_FALSE(writer->Write(&value_be, sizeof(value_be)) ==
362 sizeof(value_be));
363 return true;
364}
365
366// Adds each operation from the graph to the manifest in the order
367// specified by 'order'.
368void InstallOperationsToManifest(
369 const Graph& graph,
370 const vector<Vertex::Index>& order,
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700371 const vector<DeltaArchiveManifest_InstallOperation>& kernel_ops,
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700372 DeltaArchiveManifest* out_manifest) {
373 for (vector<Vertex::Index>::const_iterator it = order.begin();
374 it != order.end(); ++it) {
375 DeltaArchiveManifest_InstallOperation* op =
376 out_manifest->add_install_operations();
377 *op = graph[*it].op;
378 }
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700379 for (vector<DeltaArchiveManifest_InstallOperation>::const_iterator it =
380 kernel_ops.begin(); it != kernel_ops.end(); ++it) {
381 DeltaArchiveManifest_InstallOperation* op =
382 out_manifest->add_kernel_install_operations();
383 *op = *it;
384 }
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700385}
386
387void CheckGraph(const Graph& graph) {
388 for (Graph::const_iterator it = graph.begin(); it != graph.end(); ++it) {
389 CHECK(it->op.has_type());
390 }
391}
392
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700393// Delta compresses a kernel partition new_kernel_part with knowledge of
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700394// the old kernel partition old_kernel_part.
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700395bool DeltaCompressKernelPartition(
396 const string& old_kernel_part,
397 const string& new_kernel_part,
398 vector<DeltaArchiveManifest_InstallOperation>* ops,
399 int blobs_fd,
400 off_t* blobs_length) {
401 // For now, just bsdiff the kernel partition as a whole.
402 // TODO(adlr): Use knowledge of how the kernel partition is laid out
403 // to more efficiently compress it.
404
405 LOG(INFO) << "Delta compressing kernel partition...";
406
407 // Add a new install operation
408 ops->resize(1);
409 DeltaArchiveManifest_InstallOperation* op = &(*ops)[0];
Andrew de los Reyes36f37362010-09-03 09:20:04 -0700410 op->set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700411 op->set_data_offset(*blobs_length);
412
413 // Do the actual compression
414 vector<char> data;
Andrew de los Reyes36f37362010-09-03 09:20:04 -0700415 TEST_AND_RETURN_FALSE(utils::ReadFile(new_kernel_part, &data));
416 TEST_AND_RETURN_FALSE(!data.empty());
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700417
Andrew de los Reyes36f37362010-09-03 09:20:04 -0700418 vector<char> data_bz;
419 TEST_AND_RETURN_FALSE(BzipCompress(data, &data_bz));
420 CHECK(!data_bz.empty());
421
422 TEST_AND_RETURN_FALSE(utils::WriteAll(blobs_fd, &data_bz[0], data_bz.size()));
423 *blobs_length += data_bz.size();
424
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700425 off_t new_part_size = utils::FileSize(new_kernel_part);
426 TEST_AND_RETURN_FALSE(new_part_size >= 0);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700427
Andrew de los Reyes36f37362010-09-03 09:20:04 -0700428 op->set_data_length(data_bz.size());
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700429
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700430 op->set_dst_length(new_part_size);
431
Andrew de los Reyes877ca8d2010-09-07 14:42:49 -0700432 // There's a single dest extent
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700433 Extent* dst_extent = op->add_dst_extents();
434 dst_extent->set_start_block(0);
435 dst_extent->set_num_blocks((new_part_size + kBlockSize - 1) / kBlockSize);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700436
Andrew de los Reyes36f37362010-09-03 09:20:04 -0700437 LOG(INFO) << "Done compressing kernel partition.";
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -0700438 return true;
439}
440
Darin Petkov880335c2010-10-01 15:52:53 -0700441struct DeltaObject {
442 DeltaObject(const string& in_name, const int in_type, const off_t in_size)
443 : name(in_name),
444 type(in_type),
445 size(in_size) {}
446 bool operator <(const DeltaObject& object) const {
447 return size < object.size;
448 }
449 string name;
450 int type;
451 off_t size;
452};
453
454static const char* kInstallOperationTypes[] = {
455 "REPLACE",
456 "REPLACE_BZ",
457 "MOVE",
458 "BSDIFF"
459};
460
461void ReportPayloadUsage(const Graph& graph,
462 const DeltaArchiveManifest& manifest) {
463 vector<DeltaObject> objects;
464 off_t total_size = 0;
465
466 // Graph nodes with information about file names.
467 for (Vertex::Index node = 0; node < graph.size(); node++) {
468 objects.push_back(DeltaObject(graph[node].file_name,
469 graph[node].op.type(),
470 graph[node].op.data_length()));
471 total_size += graph[node].op.data_length();
472 }
473
474 // Final rootfs operation writing non-file-data.
475 const DeltaArchiveManifest_InstallOperation& final_op =
476 manifest.install_operations(manifest.install_operations_size() - 1);
477 objects.push_back(DeltaObject("<rootfs-final-operation>",
478 final_op.type(),
479 final_op.data_length()));
480 total_size += final_op.data_length();
481
482 // Kernel install operations.
483 for (int i = 0; i < manifest.kernel_install_operations_size(); ++i) {
484 const DeltaArchiveManifest_InstallOperation& op =
485 manifest.kernel_install_operations(i);
486 objects.push_back(DeltaObject(StringPrintf("<kernel-operation-%d>", i),
487 op.type(),
488 op.data_length()));
489 total_size += op.data_length();
490 }
491
492 std::sort(objects.begin(), objects.end());
493
494 static const char kFormatString[] = "%6.2f%% %10llu %-10s %s\n";
495 for (vector<DeltaObject>::const_iterator it = objects.begin();
496 it != objects.end(); ++it) {
497 const DeltaObject& object = *it;
498 fprintf(stderr, kFormatString,
499 object.size * 100.0 / total_size,
500 object.size,
501 kInstallOperationTypes[object.type],
502 object.name.c_str());
503 }
504 fprintf(stderr, kFormatString, 100.0, total_size, "", "<total>");
505}
506
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700507} // namespace {}
508
509bool DeltaDiffGenerator::ReadFileToDiff(
510 const string& old_filename,
511 const string& new_filename,
512 vector<char>* out_data,
513 DeltaArchiveManifest_InstallOperation* out_op) {
514 // Read new data in
515 vector<char> new_data;
516 TEST_AND_RETURN_FALSE(utils::ReadFile(new_filename, &new_data));
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700517
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700518 TEST_AND_RETURN_FALSE(!new_data.empty());
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700519
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700520 vector<char> new_data_bz;
521 TEST_AND_RETURN_FALSE(BzipCompress(new_data, &new_data_bz));
522 CHECK(!new_data_bz.empty());
523
524 vector<char> data; // Data blob that will be written to delta file.
525
526 DeltaArchiveManifest_InstallOperation operation;
527 size_t current_best_size = 0;
528 if (new_data.size() <= new_data_bz.size()) {
529 operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
530 current_best_size = new_data.size();
531 data = new_data;
532 } else {
533 operation.set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ);
534 current_best_size = new_data_bz.size();
535 data = new_data_bz;
536 }
537
538 // Do we have an original file to consider?
539 struct stat old_stbuf;
540 if (0 != stat(old_filename.c_str(), &old_stbuf)) {
541 // If stat-ing the old file fails, it should be because it doesn't exist.
542 TEST_AND_RETURN_FALSE(errno == ENOTDIR || errno == ENOENT);
543 } else {
544 // Read old data
545 vector<char> old_data;
546 TEST_AND_RETURN_FALSE(utils::ReadFile(old_filename, &old_data));
547 if (old_data == new_data) {
548 // No change in data.
549 operation.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
550 current_best_size = 0;
551 data.clear();
552 } else {
553 // Try bsdiff of old to new data
554 vector<char> bsdiff_delta;
555 TEST_AND_RETURN_FALSE(
556 BsdiffFiles(old_filename, new_filename, &bsdiff_delta));
557 CHECK_GT(bsdiff_delta.size(), 0);
558 if (bsdiff_delta.size() < current_best_size) {
559 operation.set_type(DeltaArchiveManifest_InstallOperation_Type_BSDIFF);
560 current_best_size = bsdiff_delta.size();
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700561
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700562 data = bsdiff_delta;
563 }
564 }
565 }
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700566
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700567 // Set parameters of the operations
568 CHECK_EQ(data.size(), current_best_size);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700569
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700570 if (operation.type() == DeltaArchiveManifest_InstallOperation_Type_MOVE ||
571 operation.type() == DeltaArchiveManifest_InstallOperation_Type_BSDIFF) {
572 TEST_AND_RETURN_FALSE(
573 GatherExtents(old_filename, operation.mutable_src_extents()));
574 operation.set_src_length(old_stbuf.st_size);
575 }
576
577 TEST_AND_RETURN_FALSE(
578 GatherExtents(new_filename, operation.mutable_dst_extents()));
579 operation.set_dst_length(new_data.size());
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700580
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700581 out_data->swap(data);
582 *out_op = operation;
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700583
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700584 return true;
585}
586
Andrew de los Reyesef017552010-10-06 17:57:52 -0700587namespace {
588
589// Takes a collection (vector or RepeatedPtrField) of Extent and
590// returns a vector of the blocks referenced, in order.
591template<typename T>
592vector<uint64_t> ExpandExtents(const T& extents) {
593 vector<uint64_t> ret;
594 for (size_t i = 0, e = static_cast<size_t>(extents.size()); i != e; ++i) {
595 const Extent extent = graph_utils::GetElement(extents, i);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700596 if (extent.start_block() == kSparseHole) {
Andrew de los Reyesef017552010-10-06 17:57:52 -0700597 ret.resize(ret.size() + extent.num_blocks(), kSparseHole);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700598 } else {
Andrew de los Reyes09e56d62010-04-23 13:45:53 -0700599 for (uint64_t block = extent.start_block();
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700600 block < (extent.start_block() + extent.num_blocks()); block++) {
Andrew de los Reyesef017552010-10-06 17:57:52 -0700601 ret.push_back(block);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700602 }
603 }
604 }
Andrew de los Reyesef017552010-10-06 17:57:52 -0700605 return ret;
606}
607
608// Takes a vector of blocks and returns an equivalent vector of Extent
609// objects.
610vector<Extent> CompressExtents(const vector<uint64_t>& blocks) {
611 vector<Extent> new_extents;
612 for (vector<uint64_t>::const_iterator it = blocks.begin(), e = blocks.end();
613 it != e; ++it) {
614 graph_utils::AppendBlockToExtents(&new_extents, *it);
615 }
616 return new_extents;
617}
618
619} // namespace {}
620
621void DeltaDiffGenerator::SubstituteBlocks(
622 Vertex* vertex,
623 const vector<Extent>& remove_extents,
624 const vector<Extent>& replace_extents) {
625 // First, expand out the blocks that op reads from
626 vector<uint64_t> read_blocks = ExpandExtents(vertex->op.src_extents());
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700627 {
628 // Expand remove_extents and replace_extents
Andrew de los Reyesef017552010-10-06 17:57:52 -0700629 vector<uint64_t> remove_extents_expanded =
630 ExpandExtents(remove_extents);
631 vector<uint64_t> replace_extents_expanded =
632 ExpandExtents(replace_extents);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700633 CHECK_EQ(remove_extents_expanded.size(), replace_extents_expanded.size());
Andrew de los Reyesef017552010-10-06 17:57:52 -0700634 map<uint64_t, uint64_t> conversion;
Andrew de los Reyes09e56d62010-04-23 13:45:53 -0700635 for (vector<uint64_t>::size_type i = 0;
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700636 i < replace_extents_expanded.size(); i++) {
Andrew de los Reyesef017552010-10-06 17:57:52 -0700637 conversion[remove_extents_expanded[i]] = replace_extents_expanded[i];
638 }
639 utils::ApplyMap(&read_blocks, conversion);
640 for (Vertex::EdgeMap::iterator it = vertex->out_edges.begin(),
641 e = vertex->out_edges.end(); it != e; ++it) {
642 vector<uint64_t> write_before_deps_expanded =
643 ExpandExtents(it->second.write_extents);
644 utils::ApplyMap(&write_before_deps_expanded, conversion);
645 it->second.write_extents = CompressExtents(write_before_deps_expanded);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700646 }
647 }
648 // Convert read_blocks back to extents
Andrew de los Reyesef017552010-10-06 17:57:52 -0700649 vertex->op.clear_src_extents();
650 vector<Extent> new_extents = CompressExtents(read_blocks);
651 DeltaDiffGenerator::StoreExtents(new_extents,
652 vertex->op.mutable_src_extents());
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700653}
654
655bool DeltaDiffGenerator::CutEdges(Graph* graph,
Andrew de los Reyesef017552010-10-06 17:57:52 -0700656 const set<Edge>& edges,
657 vector<CutEdgeVertexes>* out_cuts) {
658 DummyExtentAllocator scratch_allocator;
659 vector<CutEdgeVertexes> cuts;
660 cuts.reserve(edges.size());
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700661
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700662 uint64_t scratch_blocks_used = 0;
663 for (set<Edge>::const_iterator it = edges.begin();
664 it != edges.end(); ++it) {
Andrew de los Reyesef017552010-10-06 17:57:52 -0700665 cuts.resize(cuts.size() + 1);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700666 vector<Extent> old_extents =
667 (*graph)[it->first].out_edges[it->second].extents;
668 // Choose some scratch space
669 scratch_blocks_used += graph_utils::EdgeWeight(*graph, *it);
670 LOG(INFO) << "using " << graph_utils::EdgeWeight(*graph, *it)
671 << " scratch blocks ("
672 << scratch_blocks_used << ")";
Andrew de los Reyesef017552010-10-06 17:57:52 -0700673 cuts.back().tmp_extents =
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700674 scratch_allocator.Allocate(graph_utils::EdgeWeight(*graph, *it));
675 // create vertex to copy original->scratch
Andrew de los Reyesef017552010-10-06 17:57:52 -0700676 cuts.back().new_vertex = graph->size();
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700677 graph->resize(graph->size() + 1);
Andrew de los Reyesef017552010-10-06 17:57:52 -0700678 cuts.back().old_src = it->first;
679 cuts.back().old_dst = it->second;
680
681 EdgeProperties& cut_edge_properties =
682 (*graph)[it->first].out_edges.find(it->second)->second;
683
684 // This should never happen, as we should only be cutting edges between
685 // real file nodes, and write-before relationships are created from
686 // a real file node to a temp copy node:
687 CHECK(cut_edge_properties.write_extents.empty())
688 << "Can't cut edge that has write-before relationship.";
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -0700689
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700690 // make node depend on the copy operation
691 (*graph)[it->first].out_edges.insert(make_pair(graph->size() - 1,
Andrew de los Reyesef017552010-10-06 17:57:52 -0700692 cut_edge_properties));
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700693
694 // Set src/dst extents and other proto variables for copy operation
695 graph->back().op.set_type(DeltaArchiveManifest_InstallOperation_Type_MOVE);
696 DeltaDiffGenerator::StoreExtents(
Andrew de los Reyesef017552010-10-06 17:57:52 -0700697 cut_edge_properties.extents,
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700698 graph->back().op.mutable_src_extents());
Andrew de los Reyesef017552010-10-06 17:57:52 -0700699 DeltaDiffGenerator::StoreExtents(cuts.back().tmp_extents,
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700700 graph->back().op.mutable_dst_extents());
701 graph->back().op.set_src_length(
702 graph_utils::EdgeWeight(*graph, *it) * kBlockSize);
703 graph->back().op.set_dst_length(graph->back().op.src_length());
704
705 // make the dest node read from the scratch space
706 DeltaDiffGenerator::SubstituteBlocks(
Andrew de los Reyesef017552010-10-06 17:57:52 -0700707 &((*graph)[it->second]),
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700708 (*graph)[it->first].out_edges[it->second].extents,
Andrew de los Reyesef017552010-10-06 17:57:52 -0700709 cuts.back().tmp_extents);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700710
711 // delete the old edge
712 CHECK_EQ(1, (*graph)[it->first].out_edges.erase(it->second));
Chris Masone790e62e2010-08-12 10:41:18 -0700713
Andrew de los Reyesd12784c2010-07-26 13:55:14 -0700714 // Add an edge from dst to copy operation
Andrew de los Reyesef017552010-10-06 17:57:52 -0700715 EdgeProperties write_before_edge_properties;
716 write_before_edge_properties.write_extents = cuts.back().tmp_extents;
717 (*graph)[it->second].out_edges.insert(
718 make_pair(graph->size() - 1, write_before_edge_properties));
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700719 }
Andrew de los Reyesef017552010-10-06 17:57:52 -0700720 out_cuts->swap(cuts);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700721 return true;
722}
723
724// Stores all Extents in 'extents' into 'out'.
725void DeltaDiffGenerator::StoreExtents(
Andrew de los Reyesef017552010-10-06 17:57:52 -0700726 const vector<Extent>& extents,
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700727 google::protobuf::RepeatedPtrField<Extent>* out) {
728 for (vector<Extent>::const_iterator it = extents.begin();
729 it != extents.end(); ++it) {
730 Extent* new_extent = out->Add();
731 *new_extent = *it;
732 }
733}
734
735// Creates all the edges for the graph. Writers of a block point to
736// readers of the same block. This is because for an edge A->B, B
737// must complete before A executes.
738void DeltaDiffGenerator::CreateEdges(Graph* graph,
739 const vector<Block>& blocks) {
740 for (vector<Block>::size_type i = 0; i < blocks.size(); i++) {
741 // Blocks with both a reader and writer get an edge
742 if (blocks[i].reader == Vertex::kInvalidIndex ||
743 blocks[i].writer == Vertex::kInvalidIndex)
744 continue;
745 // Don't have a node depend on itself
746 if (blocks[i].reader == blocks[i].writer)
747 continue;
748 // See if there's already an edge we can add onto
749 Vertex::EdgeMap::iterator edge_it =
750 (*graph)[blocks[i].writer].out_edges.find(blocks[i].reader);
751 if (edge_it == (*graph)[blocks[i].writer].out_edges.end()) {
752 // No existing edge. Create one
753 (*graph)[blocks[i].writer].out_edges.insert(
754 make_pair(blocks[i].reader, EdgeProperties()));
755 edge_it = (*graph)[blocks[i].writer].out_edges.find(blocks[i].reader);
Chris Masone790e62e2010-08-12 10:41:18 -0700756 CHECK(edge_it != (*graph)[blocks[i].writer].out_edges.end());
Andrew de los Reyesb10320d2010-03-31 16:44:44 -0700757 }
758 graph_utils::AppendBlockToExtents(&edge_it->second.extents, i);
759 }
760}
761
Andrew de los Reyesef017552010-10-06 17:57:52 -0700762namespace {
763
764class SortCutsByTopoOrderLess {
765 public:
766 SortCutsByTopoOrderLess(vector<vector<Vertex::Index>::size_type>& table)
767 : table_(table) {}
768 bool operator()(const CutEdgeVertexes& a, const CutEdgeVertexes& b) {
769 return table_[a.old_dst] < table_[b.old_dst];
770 }
771 private:
772 vector<vector<Vertex::Index>::size_type>& table_;
773};
774
775} // namespace {}
776
777void DeltaDiffGenerator::GenerateReverseTopoOrderMap(
778 vector<Vertex::Index>& op_indexes,
779 vector<vector<Vertex::Index>::size_type>* reverse_op_indexes) {
780 vector<vector<Vertex::Index>::size_type> table(op_indexes.size());
781 for (vector<Vertex::Index>::size_type i = 0, e = op_indexes.size();
782 i != e; ++i) {
783 Vertex::Index node = op_indexes[i];
784 if (table.size() < (node + 1)) {
785 table.resize(node + 1);
786 }
787 table[node] = i;
788 }
789 reverse_op_indexes->swap(table);
790}
791
792void DeltaDiffGenerator::SortCutsByTopoOrder(vector<Vertex::Index>& op_indexes,
793 vector<CutEdgeVertexes>* cuts) {
794 // first, make a reverse lookup table.
795 vector<vector<Vertex::Index>::size_type> table;
796 GenerateReverseTopoOrderMap(op_indexes, &table);
797 SortCutsByTopoOrderLess less(table);
798 sort(cuts->begin(), cuts->end(), less);
799}
800
801void DeltaDiffGenerator::MoveFullOpsToBack(Graph* graph,
802 vector<Vertex::Index>* op_indexes) {
803 vector<Vertex::Index> ret;
804 vector<Vertex::Index> full_ops;
805 ret.reserve(op_indexes->size());
806 for (vector<Vertex::Index>::size_type i = 0, e = op_indexes->size(); i != e;
807 ++i) {
808 DeltaArchiveManifest_InstallOperation_Type type =
809 (*graph)[(*op_indexes)[i]].op.type();
810 if (type == DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
811 type == DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ) {
812 full_ops.push_back((*op_indexes)[i]);
813 } else {
814 ret.push_back((*op_indexes)[i]);
815 }
816 }
817 LOG(INFO) << "Stats: " << full_ops.size() << " full ops out of "
818 << (full_ops.size() + ret.size()) << " total ops.";
819 ret.insert(ret.end(), full_ops.begin(), full_ops.end());
820 op_indexes->swap(ret);
821}
822
823namespace {
824
825template<typename T>
826bool TempBlocksExistInExtents(const T& extents) {
827 for (int i = 0, e = extents.size(); i < e; ++i) {
828 Extent extent = graph_utils::GetElement(extents, i);
829 uint64_t start = extent.start_block();
830 uint64_t num = extent.num_blocks();
831 if (start == kSparseHole)
832 continue;
833 if (start >= kTempBlockStart ||
834 (start + num) >= kTempBlockStart) {
835 LOG(ERROR) << "temp block!";
836 LOG(ERROR) << "start: " << start << ", num: " << num;
837 LOG(ERROR) << "kTempBlockStart: " << kTempBlockStart;
838 LOG(ERROR) << "returning true";
839 return true;
840 }
841 // check for wrap-around, which would be a bug:
842 CHECK(start <= (start + num));
843 }
844 return false;
845}
846
847} // namespace {}
848
849bool DeltaDiffGenerator::AssignTempBlocks(
850 Graph* graph,
851 const string& new_root,
852 int data_fd,
853 off_t* data_file_size,
854 vector<Vertex::Index>* op_indexes,
855 vector<vector<Vertex::Index>::size_type>* reverse_op_indexes,
856 vector<CutEdgeVertexes>& cuts) {
857 CHECK(!cuts.empty());
858 for (vector<CutEdgeVertexes>::size_type i = cuts.size() - 1, e = 0;
859 true ; --i) {
860 LOG(INFO) << "Fixing temp blocks in cut " << i
861 << ": old dst: " << cuts[i].old_dst << " new vertex: "
862 << cuts[i].new_vertex;
863 const uint64_t blocks_needed =
864 graph_utils::BlocksInExtents(cuts[i].tmp_extents);
865 LOG(INFO) << "Scanning for usable blocks (" << blocks_needed << " needed)";
866 // For now, just look for a single op w/ sufficient blocks, not
867 // considering blocks from outgoing read-before deps.
868 Vertex::Index node = cuts[i].old_dst;
869 DeltaArchiveManifest_InstallOperation_Type node_type =
870 (*graph)[node].op.type();
871 if (node_type == DeltaArchiveManifest_InstallOperation_Type_REPLACE ||
872 node_type == DeltaArchiveManifest_InstallOperation_Type_REPLACE_BZ) {
873 LOG(INFO) << "This was already converted to full, so skipping.";
874 // Delete the temp node and pointer to it from old src
875 if (!(*graph)[cuts[i].old_src].out_edges.erase(cuts[i].new_vertex)) {
876 LOG(INFO) << "Odd. node " << cuts[i].old_src << " didn't point to "
877 << cuts[i].new_vertex;
878 }
879 (*graph)[cuts[i].new_vertex].valid = false;
880 vector<Vertex::Index>::size_type new_topo_idx =
881 (*reverse_op_indexes)[cuts[i].new_vertex];
882 op_indexes->erase(op_indexes->begin() + new_topo_idx);
883 GenerateReverseTopoOrderMap(*op_indexes, reverse_op_indexes);
884 continue;
885 }
886 bool found_node = false;
887 for (vector<Vertex::Index>::size_type j = (*reverse_op_indexes)[node] + 1,
888 je = op_indexes->size(); j < je; ++j) {
889 Vertex::Index test_node = (*op_indexes)[j];
890 // See if this node has sufficient blocks
891 ExtentRanges ranges;
892 ranges.AddRepeatedExtents((*graph)[test_node].op.dst_extents());
893 ranges.SubtractExtent(ExtentForRange(
894 kTempBlockStart, kSparseHole - kTempBlockStart));
895 ranges.SubtractRepeatedExtents((*graph)[test_node].op.src_extents());
896 // For now, for simplicity, subtract out all blocks in read-before
897 // dependencies.
898 for (Vertex::EdgeMap::const_iterator edge_i =
899 (*graph)[test_node].out_edges.begin(),
900 edge_e = (*graph)[test_node].out_edges.end();
901 edge_i != edge_e; ++edge_i) {
902 ranges.SubtractExtents(edge_i->second.extents);
903 }
904
905 uint64_t blocks_found = ranges.blocks();
906 if (blocks_found < blocks_needed) {
907 if (blocks_found > 0)
908 LOG(INFO) << "insufficient blocks found in topo node " << j
909 << " (node " << (*op_indexes)[j] << "). Found only "
910 << blocks_found;
911 continue;
912 }
913 found_node = true;
914 LOG(INFO) << "Found sufficient blocks in topo node " << j
915 << " (node " << (*op_indexes)[j] << ")";
916 // Sub in the blocks, and make the node supplying the blocks
917 // depend on old_dst.
918 vector<Extent> real_extents =
919 ranges.GetExtentsForBlockCount(blocks_needed);
920
921 // Fix the old dest node w/ the real blocks
922 SubstituteBlocks(&(*graph)[node],
923 cuts[i].tmp_extents,
924 real_extents);
925
926 // Fix the new node w/ the real blocks. Since the new node is just a
927 // copy operation, we can replace all the dest extents w/ the real
928 // blocks.
929 DeltaArchiveManifest_InstallOperation *op =
930 &(*graph)[cuts[i].new_vertex].op;
931 op->clear_dst_extents();
932 StoreExtents(real_extents, op->mutable_dst_extents());
933
934 // Add an edge from the real-block supplier to the old dest block.
935 graph_utils::AddReadBeforeDepExtents(&(*graph)[test_node],
936 node,
937 real_extents);
938 break;
939 }
940 if (!found_node) {
941 // convert to full op
942 LOG(WARNING) << "Failed to find enough temp blocks for cut " << i
943 << " with old dest (graph node " << node
944 << "). Converting to a full op, at the expense of a "
945 << "good compression ratio.";
946 TEST_AND_RETURN_FALSE(ConvertCutToFullOp(graph,
947 cuts[i],
948 new_root,
949 data_fd,
950 data_file_size));
951 // move the full op to the back
952 vector<Vertex::Index> new_op_indexes;
953 for (vector<Vertex::Index>::const_iterator iter_i = op_indexes->begin(),
954 iter_e = op_indexes->end(); iter_i != iter_e; ++iter_i) {
955 if ((*iter_i == cuts[i].old_dst) || (*iter_i == cuts[i].new_vertex))
956 continue;
957 new_op_indexes.push_back(*iter_i);
958 }
959 new_op_indexes.push_back(cuts[i].old_dst);
960 op_indexes->swap(new_op_indexes);
961
962 GenerateReverseTopoOrderMap(*op_indexes, reverse_op_indexes);
963 }
964 if (i == e) {
965 // break out of for() loop
966 break;
967 }
968 }
969 return true;
970}
971
972bool DeltaDiffGenerator::NoTempBlocksRemain(const Graph& graph) {
973 size_t idx = 0;
974 for (Graph::const_iterator it = graph.begin(), e = graph.end(); it != e;
975 ++it, ++idx) {
976 if (!it->valid)
977 continue;
978 const DeltaArchiveManifest_InstallOperation& op = it->op;
979 if (TempBlocksExistInExtents(op.dst_extents()) ||
980 TempBlocksExistInExtents(op.src_extents())) {
981 LOG(INFO) << "bad extents in node " << idx;
982 LOG(INFO) << "so yeah";
983 return false;
984 }
985
986 // Check out-edges:
987 for (Vertex::EdgeMap::const_iterator jt = it->out_edges.begin(),
988 je = it->out_edges.end(); jt != je; ++jt) {
989 if (TempBlocksExistInExtents(jt->second.extents) ||
990 TempBlocksExistInExtents(jt->second.write_extents)) {
991 LOG(INFO) << "bad out edge in node " << idx;
992 LOG(INFO) << "so yeah";
993 return false;
994 }
995 }
996 }
997 return true;
998}
999
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001000bool DeltaDiffGenerator::ReorderDataBlobs(
1001 DeltaArchiveManifest* manifest,
1002 const std::string& data_blobs_path,
1003 const std::string& new_data_blobs_path) {
1004 int in_fd = open(data_blobs_path.c_str(), O_RDONLY, 0);
1005 TEST_AND_RETURN_FALSE_ERRNO(in_fd >= 0);
1006 ScopedFdCloser in_fd_closer(&in_fd);
Chris Masone790e62e2010-08-12 10:41:18 -07001007
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001008 DirectFileWriter writer;
1009 TEST_AND_RETURN_FALSE(
1010 writer.Open(new_data_blobs_path.c_str(),
1011 O_WRONLY | O_TRUNC | O_CREAT,
1012 0644) == 0);
1013 ScopedFileWriterCloser writer_closer(&writer);
Andrew de los Reyes09e56d62010-04-23 13:45:53 -07001014 uint64_t out_file_size = 0;
Chris Masone790e62e2010-08-12 10:41:18 -07001015
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -07001016 for (int i = 0; i < (manifest->install_operations_size() +
1017 manifest->kernel_install_operations_size()); i++) {
1018 DeltaArchiveManifest_InstallOperation* op = NULL;
1019 if (i < manifest->install_operations_size()) {
1020 op = manifest->mutable_install_operations(i);
1021 } else {
1022 op = manifest->mutable_kernel_install_operations(
1023 i - manifest->install_operations_size());
1024 }
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001025 if (!op->has_data_offset())
1026 continue;
1027 CHECK(op->has_data_length());
1028 vector<char> buf(op->data_length());
1029 ssize_t rc = pread(in_fd, &buf[0], buf.size(), op->data_offset());
1030 TEST_AND_RETURN_FALSE(rc == static_cast<ssize_t>(buf.size()));
1031
1032 op->set_data_offset(out_file_size);
1033 TEST_AND_RETURN_FALSE(writer.Write(&buf[0], buf.size()) ==
1034 static_cast<ssize_t>(buf.size()));
1035 out_file_size += buf.size();
1036 }
1037 return true;
1038}
1039
Andrew de los Reyesef017552010-10-06 17:57:52 -07001040bool DeltaDiffGenerator::ConvertCutToFullOp(Graph* graph,
1041 const CutEdgeVertexes& cut,
1042 const string& new_root,
1043 int data_fd,
1044 off_t* data_file_size) {
1045 // Drop all incoming edges, keep all outgoing edges
1046
1047 // Keep all outgoing edges
1048 Vertex::EdgeMap out_edges = (*graph)[cut.old_dst].out_edges;
1049 graph_utils::DropWriteBeforeDeps(&out_edges);
1050
1051 TEST_AND_RETURN_FALSE(DeltaReadFile(graph,
1052 cut.old_dst,
1053 NULL,
1054 "/-!@:&*nonexistent_path",
1055 new_root,
1056 (*graph)[cut.old_dst].file_name,
1057 data_fd,
1058 data_file_size));
1059
1060 (*graph)[cut.old_dst].out_edges = out_edges;
1061
1062 // Right now we don't have doubly-linked edges, so we have to scan
1063 // the whole graph.
1064 graph_utils::DropIncomingEdgesTo(graph, cut.old_dst);
1065
1066 // Delete temp node
1067 (*graph)[cut.old_src].out_edges.erase(cut.new_vertex);
1068 CHECK((*graph)[cut.old_dst].out_edges.find(cut.new_vertex) ==
1069 (*graph)[cut.old_dst].out_edges.end());
1070 (*graph)[cut.new_vertex].valid = false;
1071 return true;
1072}
1073
1074bool DeltaDiffGenerator::ConvertGraphToDag(Graph* graph,
1075 const string& new_root,
1076 int fd,
1077 off_t* data_file_size,
1078 vector<Vertex::Index>* final_order) {
1079 CycleBreaker cycle_breaker;
1080 LOG(INFO) << "Finding cycles...";
1081 set<Edge> cut_edges;
1082 cycle_breaker.BreakCycles(*graph, &cut_edges);
1083 LOG(INFO) << "done finding cycles";
1084 CheckGraph(*graph);
1085
1086 // Calculate number of scratch blocks needed
1087
1088 LOG(INFO) << "Cutting cycles...";
1089 vector<CutEdgeVertexes> cuts;
1090 TEST_AND_RETURN_FALSE(CutEdges(graph, cut_edges, &cuts));
1091 LOG(INFO) << "done cutting cycles";
1092 LOG(INFO) << "There are " << cuts.size() << " cuts.";
1093 CheckGraph(*graph);
1094
1095 LOG(INFO) << "Creating initial topological order...";
1096 TopologicalSort(*graph, final_order);
1097 LOG(INFO) << "done with initial topo order";
1098 CheckGraph(*graph);
1099
1100 LOG(INFO) << "Moving full ops to the back";
1101 MoveFullOpsToBack(graph, final_order);
1102 LOG(INFO) << "done moving full ops to back";
1103
1104 vector<vector<Vertex::Index>::size_type> inverse_final_order;
1105 GenerateReverseTopoOrderMap(*final_order, &inverse_final_order);
1106
1107 if (!cuts.empty())
1108 TEST_AND_RETURN_FALSE(AssignTempBlocks(graph,
1109 new_root,
1110 fd,
1111 data_file_size,
1112 final_order,
1113 &inverse_final_order,
1114 cuts));
1115 LOG(INFO) << "Making sure all temp blocks have been allocated";
1116 graph_utils::DumpGraph(*graph);
1117 CHECK(NoTempBlocksRemain(*graph));
1118 LOG(INFO) << "done making sure all temp blocks are allocated";
1119 return true;
1120}
1121
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -07001122bool DeltaDiffGenerator::GenerateDeltaUpdateFile(
1123 const string& old_root,
1124 const string& old_image,
1125 const string& new_root,
1126 const string& new_image,
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001127 const string& old_kernel_part,
1128 const string& new_kernel_part,
1129 const string& output_path,
1130 const string& private_key_path) {
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001131 struct stat old_image_stbuf;
1132 TEST_AND_RETURN_FALSE_ERRNO(stat(old_image.c_str(), &old_image_stbuf) == 0);
1133 struct stat new_image_stbuf;
1134 TEST_AND_RETURN_FALSE_ERRNO(stat(new_image.c_str(), &new_image_stbuf) == 0);
1135 LOG_IF(WARNING, new_image_stbuf.st_size != old_image_stbuf.st_size)
1136 << "Old and new images are different sizes.";
1137 LOG_IF(FATAL, new_image_stbuf.st_size % kBlockSize)
1138 << "New image not a multiple of block size " << kBlockSize;
1139 LOG_IF(FATAL, old_image_stbuf.st_size % kBlockSize)
1140 << "Old image not a multiple of block size " << kBlockSize;
1141
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -07001142 // Sanity check kernel partition args
1143 TEST_AND_RETURN_FALSE(utils::FileSize(old_kernel_part) >= 0);
1144 TEST_AND_RETURN_FALSE(utils::FileSize(new_kernel_part) >= 0);
1145
Andrew de los Reyes3270f742010-07-15 22:28:14 -07001146 vector<Block> blocks(max(old_image_stbuf.st_size / kBlockSize,
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001147 new_image_stbuf.st_size / kBlockSize));
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001148 LOG(INFO) << "invalid: " << Vertex::kInvalidIndex;
1149 LOG(INFO) << "len: " << blocks.size();
1150 for (vector<Block>::size_type i = 0; i < blocks.size(); i++) {
1151 CHECK(blocks[i].reader == Vertex::kInvalidIndex);
1152 CHECK(blocks[i].writer == Vertex::kInvalidIndex);
1153 }
1154 Graph graph;
1155 CheckGraph(graph);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001156
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001157 const string kTempFileTemplate("/tmp/CrAU_temp_data.XXXXXX");
1158 string temp_file_path;
1159 off_t data_file_size = 0;
1160
1161 LOG(INFO) << "Reading files...";
1162
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -07001163 vector<DeltaArchiveManifest_InstallOperation> kernel_ops;
1164
Andrew de los Reyesef017552010-10-06 17:57:52 -07001165 vector<Vertex::Index> final_order;
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001166 {
1167 int fd;
1168 TEST_AND_RETURN_FALSE(
1169 utils::MakeTempFile(kTempFileTemplate, &temp_file_path, &fd));
1170 TEST_AND_RETURN_FALSE(fd >= 0);
1171 ScopedFdCloser fd_closer(&fd);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001172
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001173 TEST_AND_RETURN_FALSE(DeltaReadFiles(&graph,
1174 &blocks,
1175 old_root,
1176 new_root,
1177 fd,
1178 &data_file_size));
Andrew de los Reyesef017552010-10-06 17:57:52 -07001179 LOG(INFO) << "done reading normal files";
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001180 CheckGraph(graph);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001181
Andrew de los Reyesef017552010-10-06 17:57:52 -07001182 graph.resize(graph.size() + 1);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001183 TEST_AND_RETURN_FALSE(ReadUnwrittenBlocks(blocks,
1184 fd,
1185 &data_file_size,
1186 new_image,
Andrew de los Reyesef017552010-10-06 17:57:52 -07001187 &graph.back()));
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -07001188
1189 // Read kernel partition
1190 TEST_AND_RETURN_FALSE(DeltaCompressKernelPartition(old_kernel_part,
1191 new_kernel_part,
1192 &kernel_ops,
1193 fd,
1194 &data_file_size));
Andrew de los Reyesef017552010-10-06 17:57:52 -07001195
1196 LOG(INFO) << "done reading kernel";
1197 CheckGraph(graph);
1198
1199 LOG(INFO) << "Creating edges...";
1200 CreateEdges(&graph, blocks);
1201 LOG(INFO) << "Done creating edges";
1202 CheckGraph(graph);
1203
1204 TEST_AND_RETURN_FALSE(ConvertGraphToDag(&graph,
1205 new_root,
1206 fd,
1207 &data_file_size,
1208 &final_order));
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001209 }
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001210
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001211 // Convert to protobuf Manifest object
1212 DeltaArchiveManifest manifest;
1213 CheckGraph(graph);
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -07001214 InstallOperationsToManifest(graph, final_order, kernel_ops, &manifest);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001215
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001216 CheckGraph(graph);
1217 manifest.set_block_size(kBlockSize);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001218
1219 // Reorder the data blobs with the newly ordered manifest
1220 string ordered_blobs_path;
1221 TEST_AND_RETURN_FALSE(utils::MakeTempFile(
1222 "/tmp/CrAU_temp_data.ordered.XXXXXX",
1223 &ordered_blobs_path,
1224 false));
1225 TEST_AND_RETURN_FALSE(ReorderDataBlobs(&manifest,
1226 temp_file_path,
1227 ordered_blobs_path));
1228
1229 // Check that install op blobs are in order and that all blocks are written.
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001230 uint64_t next_blob_offset = 0;
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001231 {
Andrew de los Reyes09e56d62010-04-23 13:45:53 -07001232 vector<uint32_t> written_count(blocks.size(), 0);
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -07001233 for (int i = 0; i < (manifest.install_operations_size() +
1234 manifest.kernel_install_operations_size()); i++) {
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001235 DeltaArchiveManifest_InstallOperation* op =
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -07001236 i < manifest.install_operations_size() ?
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001237 manifest.mutable_install_operations(i) :
1238 manifest.mutable_kernel_install_operations(
Andrew de los Reyesf4c7ef12010-04-30 10:37:00 -07001239 i - manifest.install_operations_size());
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001240 for (int j = 0; j < op->dst_extents_size(); j++) {
1241 const Extent& extent = op->dst_extents(j);
Andrew de los Reyes09e56d62010-04-23 13:45:53 -07001242 for (uint64_t block = extent.start_block();
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001243 block < (extent.start_block() + extent.num_blocks()); block++) {
Darin Petkovc0b7a532010-09-29 15:18:14 -07001244 if (block < blocks.size())
1245 written_count[block]++;
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001246 }
1247 }
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001248 if (op->has_data_offset()) {
1249 if (op->data_offset() != next_blob_offset) {
1250 LOG(FATAL) << "bad blob offset! " << op->data_offset() << " != "
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001251 << next_blob_offset;
1252 }
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001253 next_blob_offset += op->data_length();
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001254 }
1255 }
1256 // check all blocks written to
Andrew de los Reyes09e56d62010-04-23 13:45:53 -07001257 for (vector<uint32_t>::size_type i = 0; i < written_count.size(); i++) {
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001258 if (written_count[i] == 0) {
1259 LOG(FATAL) << "block " << i << " not written!";
1260 }
1261 }
1262 }
1263
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001264 // Signatures appear at the end of the blobs. Note the offset in the
1265 // manifest
1266 if (!private_key_path.empty()) {
1267 LOG(INFO) << "Making room for signature in file";
1268 manifest.set_signatures_offset(next_blob_offset);
1269 LOG(INFO) << "set? " << manifest.has_signatures_offset();
1270 // Add a dummy op at the end to appease older clients
1271 DeltaArchiveManifest_InstallOperation* dummy_op =
1272 manifest.add_kernel_install_operations();
1273 dummy_op->set_type(DeltaArchiveManifest_InstallOperation_Type_REPLACE);
1274 dummy_op->set_data_offset(next_blob_offset);
1275 manifest.set_signatures_offset(next_blob_offset);
1276 uint64_t signature_blob_length = 0;
1277 TEST_AND_RETURN_FALSE(
1278 PayloadSigner::SignatureBlobLength(private_key_path,
1279 &signature_blob_length));
1280 dummy_op->set_data_length(signature_blob_length);
1281 manifest.set_signatures_size(signature_blob_length);
1282 Extent* dummy_extent = dummy_op->add_dst_extents();
1283 // Tell the dummy op to write this data to a big sparse hole
1284 dummy_extent->set_start_block(kSparseHole);
1285 dummy_extent->set_num_blocks((signature_blob_length + kBlockSize - 1) /
1286 kBlockSize);
1287 }
1288
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001289 // Serialize protobuf
1290 string serialized_manifest;
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001291
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001292 CheckGraph(graph);
1293 TEST_AND_RETURN_FALSE(manifest.AppendToString(&serialized_manifest));
1294 CheckGraph(graph);
1295
1296 LOG(INFO) << "Writing final delta file header...";
1297 DirectFileWriter writer;
1298 TEST_AND_RETURN_FALSE_ERRNO(writer.Open(output_path.c_str(),
1299 O_WRONLY | O_CREAT | O_TRUNC,
1300 0644) == 0);
1301 ScopedFileWriterCloser writer_closer(&writer);
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001302
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001303 // Write header
1304 TEST_AND_RETURN_FALSE(writer.Write(kDeltaMagic, strlen(kDeltaMagic)) ==
Andrew de los Reyes08c4e272010-04-15 14:02:17 -07001305 static_cast<ssize_t>(strlen(kDeltaMagic)));
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001306
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001307 // Write version number
1308 TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer, kVersionNumber));
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001309
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001310 // Write protobuf length
1311 TEST_AND_RETURN_FALSE(WriteUint64AsBigEndian(&writer,
1312 serialized_manifest.size()));
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001313
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001314 // Write protobuf
1315 LOG(INFO) << "Writing final delta file protobuf... "
1316 << serialized_manifest.size();
1317 TEST_AND_RETURN_FALSE(writer.Write(serialized_manifest.data(),
1318 serialized_manifest.size()) ==
1319 static_cast<ssize_t>(serialized_manifest.size()));
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001320
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001321 // Append the data blobs
1322 LOG(INFO) << "Writing final delta file data blobs...";
Andrew de los Reyes09e56d62010-04-23 13:45:53 -07001323 int blobs_fd = open(ordered_blobs_path.c_str(), O_RDONLY, 0);
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001324 ScopedFdCloser blobs_fd_closer(&blobs_fd);
1325 TEST_AND_RETURN_FALSE(blobs_fd >= 0);
1326 for (;;) {
1327 char buf[kBlockSize];
1328 ssize_t rc = read(blobs_fd, buf, sizeof(buf));
1329 if (0 == rc) {
1330 // EOF
1331 break;
1332 }
1333 TEST_AND_RETURN_FALSE_ERRNO(rc > 0);
1334 TEST_AND_RETURN_FALSE(writer.Write(buf, rc) == rc);
1335 }
Andrew de los Reyes932bc4c2010-08-23 18:14:09 -07001336
1337 // Write signature blob.
1338 if (!private_key_path.empty()) {
1339 LOG(INFO) << "Signing the update...";
1340 vector<char> signature_blob;
1341 TEST_AND_RETURN_FALSE(PayloadSigner::SignPayload(output_path,
1342 private_key_path,
1343 &signature_blob));
1344 TEST_AND_RETURN_FALSE(writer.Write(&signature_blob[0],
1345 signature_blob.size()) ==
1346 static_cast<ssize_t>(signature_blob.size()));
1347 }
1348
Darin Petkov880335c2010-10-01 15:52:53 -07001349 ReportPayloadUsage(graph, manifest);
1350
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001351 LOG(INFO) << "All done. Successfully created delta file.";
1352 return true;
1353}
1354
Andrew de los Reyes09e56d62010-04-23 13:45:53 -07001355const char* const kBsdiffPath = "/usr/bin/bsdiff";
1356const char* const kBspatchPath = "/usr/bin/bspatch";
1357const char* const kDeltaMagic = "CrAU";
1358
Andrew de los Reyesb10320d2010-03-31 16:44:44 -07001359}; // namespace chromeos_update_engine