blob: 45872bc99a3c5c9551ed953d19a989a5a7962197 [file] [log] [blame]
adlr@google.com3defe6a2009-12-04 20:57:17 +00001// Copyright (c) 2009 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "update_engine/delta_diff_generator.h"
6#include <dirent.h>
7#include <endian.h>
8#include <errno.h>
9#include <stdio.h>
10#include <unistd.h>
11#include <algorithm>
12#include <vector>
13#include <tr1/memory>
14#include <zlib.h>
15#include "chromeos/obsolete_logging.h"
16#include "base/scoped_ptr.h"
17#include "update_engine/delta_diff_parser.h"
18#include "update_engine/gzip.h"
19#include "update_engine/subprocess.h"
20#include "update_engine/utils.h"
21
22using std::vector;
23using std::tr1::shared_ptr;
24using chromeos_update_engine::DeltaArchiveManifest;
25
26namespace chromeos_update_engine {
27
28namespace {
29// These structs and methods are helpers for EncodeDataToDeltaFile()
30
31// Before moving the data into a proto buffer, the data is stored in
32// memory in these Node and Child structures.
33
34// Each Node struct represents a file on disk (which can be regular file,
35// directory, fifo, socket, symlink, etc). Nodes that contain children
36// (just directories) will have a vector of Child objects. Each child
37// object has a filename and a pointer to the associated Node. Thus,
38// filenames for files are stored with their parents, not as part of
39// the file itself.
40
41// These structures are easier to work with than the protobuf format
42// when adding files. When generating a delta file, we add an entry
43// for each file to a root Node object. Then, we sort each Node's
44// children vector so the children are stored alphabetically. Then,
45// we assign an index value to the idx field of each Node by a preorder
46// tree traversal. The index value assigned to a Node is the index it
47// will have in the DeltaArchiveManifest protobuf.
48// Finally, we add each Node to a DeltaArchiveManifest protobuf.
49
50struct Node;
51
52struct Child {
53 Child(const string& the_name,
54 Node* the_node)
55 : name(the_name),
56 node(the_node) {}
57 string name;
58 // Use shared_ptr here rather than scoped_ptr b/c this struct will be copied
59 // in stl containers
60 scoped_ptr<Node> node;
61};
62
63// For the C++ sort() function.
64struct ChildLessThan {
65 bool operator()(const shared_ptr<Child>& a, const shared_ptr<Child>& b) {
66 return a->name < b->name;
67 }
68};
69
70struct Node {
71 Node()
72 : mode(0),
73 uid(0),
74 gid(0),
Andrew de los Reyese5733992009-12-08 13:34:00 -080075 nlink(0),
76 inode(0),
adlr@google.com3defe6a2009-12-04 20:57:17 +000077 compressed(false),
78 offset(-1),
79 length(0),
80 idx(0) {}
81
82 mode_t mode;
83 uid_t uid;
84 gid_t gid;
85
Andrew de los Reyese5733992009-12-08 13:34:00 -080086 // a file may be a potential hardlink if it's not a directory
87 // and it has a link count > 1.
88 bool IsPotentialHardlink() const {
89 return !S_ISDIR(mode) && nlink > 1;
90 }
91 nlink_t nlink; // number of hard links
92 ino_t inode;
93
adlr@google.com3defe6a2009-12-04 20:57:17 +000094 // data
95 bool compressed;
96 int offset; // -1 means no data
97 int length;
98
99 vector<shared_ptr<Child> > children;
100 int idx;
101};
102
103// This function sets *node's variables to match what's at path.
104// This includes calling this function recursively on all children. Children
105// not on the same device as the original node will not be considered.
106// Returns true on success.
107bool UpdateNodeFromPath(const string& path, Node* node) {
108 // Set metadata
109 struct stat stbuf;
110 TEST_AND_RETURN_FALSE_ERRNO(lstat(path.c_str(), &stbuf) == 0);
111 const dev_t dev = stbuf.st_dev;
112 node->mode = stbuf.st_mode;
113 node->uid = stbuf.st_uid;
114 node->gid = stbuf.st_gid;
Andrew de los Reyese5733992009-12-08 13:34:00 -0800115 node->nlink = stbuf.st_nlink;
116 node->inode = stbuf.st_ino;
adlr@google.com3defe6a2009-12-04 20:57:17 +0000117 if (!S_ISDIR(node->mode)) {
118 return true;
119 }
120
121 DIR* dir = opendir(path.c_str());
122 TEST_AND_RETURN_FALSE(dir);
123
124 struct dirent entry;
125 struct dirent* dir_entry;
126
127 for (;;) {
128 TEST_AND_RETURN_FALSE_ERRNO(readdir_r(dir, &entry, &dir_entry) == 0);
129 if (!dir_entry) {
130 // done
131 break;
132 }
133 if (!strcmp(".", dir_entry->d_name))
134 continue;
135 if (!strcmp("..", dir_entry->d_name))
136 continue;
137
138 string child_path = path + "/" + dir_entry->d_name;
139 struct stat child_stbuf;
140 TEST_AND_RETURN_FALSE_ERRNO(lstat(child_path.c_str(), &child_stbuf) == 0);
141 // make sure it's on the same dev
142 if (child_stbuf.st_dev != dev)
143 continue;
144 shared_ptr<Child> child(new Child(dir_entry->d_name, new Node));
145 node->children.push_back(child);
146 TEST_AND_RETURN_FALSE(UpdateNodeFromPath(path + "/" + child->name,
147 child->node.get()));
148 }
149 TEST_AND_RETURN_FALSE_ERRNO(closedir(dir) == 0);
150 // Done with all subdirs. sort children.
151 sort(node->children.begin(), node->children.end(), ChildLessThan());
152 return true;
153}
154
155// We go through n setting the index value of each Node to
156// *next_index_value, then increment next_index_value.
157// We then recursively assign index values to children.
158// The first caller should call this with *next_index_value == 0 and
159// the root Node, thus setting the root Node's index to 0.
160void PopulateChildIndexes(Node* n, int* next_index_value) {
161 n->idx = (*next_index_value)++;
162 for (unsigned int i = 0; i < n->children.size(); i++) {
163 PopulateChildIndexes(n->children[i]->node.get(), next_index_value);
164 }
165}
166
167// This converts a Node tree rooted at n into a DeltaArchiveManifest.
Andrew de los Reyese5733992009-12-08 13:34:00 -0800168void NodeToDeltaArchiveManifest(Node* n, DeltaArchiveManifest* archive,
169 map<ino_t, string>* hard_links,
170 const string& path) {
adlr@google.com3defe6a2009-12-04 20:57:17 +0000171 DeltaArchiveManifest_File *f = archive->add_files();
172 f->set_mode(n->mode);
173 f->set_uid(n->uid);
174 f->set_gid(n->gid);
Andrew de los Reyese5733992009-12-08 13:34:00 -0800175 if (utils::MapContainsKey(*hard_links, n->inode)) {
176 // We have a hard link
177 CHECK(!S_ISDIR(n->mode));
178 f->set_hardlink_path((*hard_links)[n->inode]);
179 } else if (n->IsPotentialHardlink()) {
180 (*hard_links)[n->inode] = path;
181 }
adlr@google.com3defe6a2009-12-04 20:57:17 +0000182 if (!S_ISDIR(n->mode))
183 return;
184 for (unsigned int i = 0; i < n->children.size(); i++) {
185 DeltaArchiveManifest_File_Child* child = f->add_children();
186 child->set_name(n->children[i]->name);
187 child->set_index(n->children[i]->node->idx);
188 }
189 for (unsigned int i = 0; i < n->children.size(); i++) {
Andrew de los Reyese5733992009-12-08 13:34:00 -0800190 NodeToDeltaArchiveManifest(n->children[i]->node.get(), archive, hard_links,
191 path + "/" + n->children[i]->name);
adlr@google.com3defe6a2009-12-04 20:57:17 +0000192 }
193}
194
195} // namespace {}
196
197// For each file in archive, write a delta for it into out_file
198// and update 'file' to refer to the delta.
199// This is a recursive function. Returns true on success.
200bool DeltaDiffGenerator::WriteFileDiffsToDeltaFile(
201 DeltaArchiveManifest* archive,
202 DeltaArchiveManifest_File* file,
203 const std::string& file_name,
204 const std::string& old_path,
205 const std::string& new_path,
206 FileWriter* out_file_writer,
Andrew de los Reyese5733992009-12-08 13:34:00 -0800207 int* out_file_length,
208 const std::string& force_compress_dev_path) {
adlr@google.com3defe6a2009-12-04 20:57:17 +0000209 TEST_AND_RETURN_FALSE(file->has_mode());
210
211 // Stat the actual file, too
212 struct stat stbuf;
213 TEST_AND_RETURN_FALSE_ERRNO(lstat((new_path + "/" + file_name).c_str(),
214 &stbuf) == 0);
215 TEST_AND_RETURN_FALSE(stbuf.st_mode == file->mode());
216
217 // See if we're a directory or not
218 if (S_ISDIR(file->mode())) {
219 for (int i = 0; i < file->children_size(); i++) {
220 DeltaArchiveManifest_File_Child* child = file->mutable_children(i);
221 DeltaArchiveManifest_File* child_file =
222 archive->mutable_files(child->index());
223 TEST_AND_RETURN_FALSE(WriteFileDiffsToDeltaFile(
224 archive,
225 child_file,
226 child->name(),
227 old_path + "/" + file_name,
228 new_path + "/" + file_name,
229 out_file_writer,
Andrew de los Reyese5733992009-12-08 13:34:00 -0800230 out_file_length,
231 force_compress_dev_path));
adlr@google.com3defe6a2009-12-04 20:57:17 +0000232 }
233 return true;
234 }
235
Andrew de los Reyese5733992009-12-08 13:34:00 -0800236 if (S_ISFIFO(file->mode()) || S_ISSOCK(file->mode()) ||
237 file->has_hardlink_path()) {
adlr@google.com3defe6a2009-12-04 20:57:17 +0000238 // These don't store any additional data
239 return true;
240 }
241
242 vector<char> data;
243 bool should_compress = true;
244 bool format_set = false;
245 DeltaArchiveManifest_File_DataFormat format;
246 if (S_ISLNK(file->mode())) {
adlr@google.com3defe6a2009-12-04 20:57:17 +0000247 TEST_AND_RETURN_FALSE(EncodeLink(new_path + "/" + file_name, &data));
248 } else if (S_ISCHR(file->mode()) || S_ISBLK(file->mode())) {
Andrew de los Reyese5733992009-12-08 13:34:00 -0800249 TEST_AND_RETURN_FALSE(EncodeDev(stbuf, &data, &format,
250 new_path + "/" + file_name ==
251 force_compress_dev_path));
252 format_set = true;
adlr@google.com3defe6a2009-12-04 20:57:17 +0000253 } else if (S_ISREG(file->mode())) {
adlr@google.com3defe6a2009-12-04 20:57:17 +0000254 // regular file. We may use a delta here.
255 TEST_AND_RETURN_FALSE(EncodeFile(old_path, new_path, file_name, &format,
256 &data));
257 should_compress = false;
258 format_set = true;
259 if ((format == DeltaArchiveManifest_File_DataFormat_BSDIFF) ||
260 (format == DeltaArchiveManifest_File_DataFormat_FULL_GZ))
261 TEST_AND_RETURN_FALSE(!data.empty());
262 } else {
263 // Should never get here; unhandled mode type.
264 LOG(ERROR) << "Unhandled mode type: " << file->mode();
265 return false;
266 }
Andrew de los Reyese5733992009-12-08 13:34:00 -0800267
adlr@google.com3defe6a2009-12-04 20:57:17 +0000268 if (!format_set) {
269 // Pick a format now
270 vector<char> compressed_data;
271 TEST_AND_RETURN_FALSE(GzipCompress(data, &compressed_data));
272 if (compressed_data.size() < data.size()) {
273 format = DeltaArchiveManifest_File_DataFormat_FULL_GZ;
274 data.swap(compressed_data);
275 } else {
276 format = DeltaArchiveManifest_File_DataFormat_FULL;
277 }
278 format_set = true;
279 }
280
281 if (!data.empty()) {
282 TEST_AND_RETURN_FALSE(format_set);
283 file->set_data_format(format);
284 file->set_data_offset(*out_file_length);
285 TEST_AND_RETURN_FALSE(static_cast<ssize_t>(data.size()) ==
286 out_file_writer->Write(&data[0], data.size()));
287 file->set_data_length(data.size());
288 *out_file_length += data.size();
289 }
290 return true;
291}
292
293bool DeltaDiffGenerator::EncodeLink(const std::string& path,
294 std::vector<char>* out) {
295 // Store symlink path as file data
296 vector<char> link_data(4096);
297 int rc = readlink(path.c_str(), &link_data[0], link_data.size());
298 TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
299 link_data.resize(rc);
300 out->swap(link_data);
301 return true;
302}
303
Andrew de los Reyese5733992009-12-08 13:34:00 -0800304bool DeltaDiffGenerator::EncodeDev(
305 const struct stat& stbuf,
306 vector<char>* out,
307 DeltaArchiveManifest_File_DataFormat* format,
308 bool force_compression) {
adlr@google.com3defe6a2009-12-04 20:57:17 +0000309 LinuxDevice dev;
310 dev.set_major(major(stbuf.st_rdev));
311 dev.set_minor(minor(stbuf.st_rdev));
312 out->resize(dev.ByteSize());
313 TEST_AND_RETURN_FALSE(dev.SerializeToArray(&(*out)[0], out->size()));
Andrew de los Reyese5733992009-12-08 13:34:00 -0800314 if (force_compression) {
315 vector<char> compressed;
316 TEST_AND_RETURN_FALSE(GzipCompress(*out, &compressed));
317 out->swap(compressed);
318 *format = DeltaArchiveManifest_File_DataFormat_FULL_GZ;
319 } else {
320 *format = DeltaArchiveManifest_File_DataFormat_FULL;
321 }
adlr@google.com3defe6a2009-12-04 20:57:17 +0000322 return true;
323}
324
325// Encode the file at new_path + "/" + file_name. It may be a binary diff
326// based on old_path + "/" + file_name. out_data_format will be set to
327// the format used. out_data_format may not be NULL.
328bool DeltaDiffGenerator::EncodeFile(
329 const string& old_dir,
330 const string& new_dir,
331 const string& file_name,
332 DeltaArchiveManifest_File_DataFormat* out_data_format,
333 vector<char>* out) {
334 TEST_AND_RETURN_FALSE(out_data_format);
335 // First, see the full length:
336 vector<char> full_data;
337 TEST_AND_RETURN_FALSE(utils::ReadFile(new_dir + "/" + file_name, &full_data));
338 vector<char> gz_data;
339 if (!full_data.empty()) {
340 TEST_AND_RETURN_FALSE(GzipCompress(full_data, &gz_data));
341 }
342 vector<char> *ret = NULL;
343
344 if (gz_data.size() < full_data.size()) {
345 *out_data_format = DeltaArchiveManifest_File_DataFormat_FULL_GZ;
346 ret = &gz_data;
347 } else {
348 *out_data_format = DeltaArchiveManifest_File_DataFormat_FULL;
349 ret = &full_data;
350 }
351
352 struct stat old_stbuf;
353 if ((stat((old_dir + "/" + file_name).c_str(), &old_stbuf) < 0) ||
354 (!S_ISREG(old_stbuf.st_mode))) {
355 // stat() failed or old file is not a regular file. Just send back the full
356 // contents
357 *out = *ret;
358 return true;
359 }
360 // We have an old file. Do a binary diff. For now use bsdiff.
361 const string kPatchFile = "/tmp/delta.patch";
362
363 vector<string> cmd;
364 cmd.push_back("/usr/bin/bsdiff");
365 cmd.push_back(old_dir + "/" + file_name);
366 cmd.push_back(new_dir + "/" + file_name);
367 cmd.push_back(kPatchFile);
368
369 int rc = 1;
370 TEST_AND_RETURN_FALSE(Subprocess::SynchronousExec(cmd, &rc));
371 TEST_AND_RETURN_FALSE(rc == 0);
372 vector<char> patch_file;
373 TEST_AND_RETURN_FALSE(utils::ReadFile(kPatchFile, &patch_file));
374 unlink(kPatchFile.c_str());
375
376 if (patch_file.size() < ret->size()) {
377 *out_data_format = DeltaArchiveManifest_File_DataFormat_BSDIFF;
378 ret = &patch_file;
379 }
380
381 *out = *ret;
382 return true;
383}
384
385DeltaArchiveManifest* DeltaDiffGenerator::EncodeMetadataToProtoBuffer(
386 const char* new_path) {
387 Node node;
388 if (!UpdateNodeFromPath(new_path, &node))
389 return NULL;
390 int index = 0;
391 PopulateChildIndexes(&node, &index);
392 DeltaArchiveManifest *ret = new DeltaArchiveManifest;
Andrew de los Reyese5733992009-12-08 13:34:00 -0800393 map<ino_t, string> hard_links; // inode -> first found path for inode
394 NodeToDeltaArchiveManifest(&node, ret, &hard_links, "");
adlr@google.com3defe6a2009-12-04 20:57:17 +0000395 return ret;
396}
397
Andrew de los Reyese5733992009-12-08 13:34:00 -0800398bool DeltaDiffGenerator::EncodeDataToDeltaFile(
399 DeltaArchiveManifest* archive,
400 const std::string& old_path,
401 const std::string& new_path,
402 const std::string& out_file,
403 const std::string& force_compress_dev_path) {
adlr@google.com3defe6a2009-12-04 20:57:17 +0000404 DirectFileWriter out_writer;
405 int r = out_writer.Open(out_file.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0666);
406 TEST_AND_RETURN_FALSE_ERRNO(r >= 0);
407 ScopedFileWriterCloser closer(&out_writer);
408 TEST_AND_RETURN_FALSE(out_writer.Write(DeltaDiffParser::kFileMagic,
409 strlen(DeltaDiffParser::kFileMagic))
410 == static_cast<ssize_t>(
411 strlen(DeltaDiffParser::kFileMagic)));
412 // Write 8 null bytes. This will be filled in w/ the offset of
413 // the protobuf.
414 TEST_AND_RETURN_FALSE(out_writer.Write("\0\0\0\0\0\0\0\0", 8) == 8);
415 // 8 more bytes will be filled w/ the protobuf length.
416 TEST_AND_RETURN_FALSE(out_writer.Write("\0\0\0\0\0\0\0\0", 8) == 8);
417 int out_file_length = strlen(DeltaDiffParser::kFileMagic) + 16;
418
419 TEST_AND_RETURN_FALSE(archive->files_size() > 0);
420 DeltaArchiveManifest_File* file = archive->mutable_files(0);
421
422 TEST_AND_RETURN_FALSE(WriteFileDiffsToDeltaFile(archive,
423 file,
424 "",
425 old_path,
426 new_path,
427 &out_writer,
Andrew de los Reyese5733992009-12-08 13:34:00 -0800428 &out_file_length,
429 force_compress_dev_path));
adlr@google.com3defe6a2009-12-04 20:57:17 +0000430
431 // Finally, write the protobuf to the end of the file
432 string encoded_archive;
433 TEST_AND_RETURN_FALSE(archive->SerializeToString(&encoded_archive));
434
435 // Compress the protobuf (which contains filenames)
436 vector<char> compressed_encoded_archive;
437 TEST_AND_RETURN_FALSE(GzipCompressString(encoded_archive,
438 &compressed_encoded_archive));
439
440 TEST_AND_RETURN_FALSE(out_writer.Write(compressed_encoded_archive.data(),
441 compressed_encoded_archive.size()) ==
442 static_cast<ssize_t>(
443 compressed_encoded_archive.size()));
444
445 // write offset of protobut to just after the file magic
446 int64 big_endian_protobuf_offset = htobe64(out_file_length);
447 TEST_AND_RETURN_FALSE(pwrite(out_writer.fd(),
448 &big_endian_protobuf_offset,
449 sizeof(big_endian_protobuf_offset),
450 strlen(DeltaDiffParser::kFileMagic)) ==
451 sizeof(big_endian_protobuf_offset));
452 // Write the size just after the offset
453 int64 pb_length = htobe64(compressed_encoded_archive.size());
454 TEST_AND_RETURN_FALSE(pwrite(out_writer.fd(),
455 &pb_length,
456 sizeof(pb_length),
457 strlen(DeltaDiffParser::kFileMagic) +
458 sizeof(big_endian_protobuf_offset)) ==
459 sizeof(pb_length));
460 return true;
461}
462
463} // namespace chromeos_update_engine