Don Garrett | f4b2874 | 2012-03-27 20:48:06 -0700 | [diff] [blame] | 1 | // Copyright (c) 2012 The Chromium OS Authors. All rights reserved. |
adlr@google.com | 3defe6a | 2009-12-04 20:57:17 +0000 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #ifndef CHROMEOS_PLATFORM_UPDATE_ENGINE_DELTA_DIFF_GENERATOR_H__ |
| 6 | #define CHROMEOS_PLATFORM_UPDATE_ENGINE_DELTA_DIFF_GENERATOR_H__ |
| 7 | |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 8 | #include <string> |
| 9 | #include <vector> |
adlr@google.com | 3defe6a | 2009-12-04 20:57:17 +0000 | [diff] [blame] | 10 | #include "base/basictypes.h" |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 11 | #include "update_engine/graph_types.h" |
| 12 | #include "update_engine/update_metadata.pb.h" |
| 13 | |
| 14 | // There is one function in DeltaDiffGenerator of importance to users |
| 15 | // of the class: GenerateDeltaUpdateFile(). Before calling it, |
| 16 | // the old and new images must be mounted. Call GenerateDeltaUpdateFile() |
| 17 | // with both the mount-points of the images in addition to the paths of |
| 18 | // the images (both old and new). A delta from old to new will be |
| 19 | // generated and stored in output_path. |
adlr@google.com | 3defe6a | 2009-12-04 20:57:17 +0000 | [diff] [blame] | 20 | |
| 21 | namespace chromeos_update_engine { |
| 22 | |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 23 | // This struct stores all relevant info for an edge that is cut between |
| 24 | // nodes old_src -> old_dst by creating new vertex new_vertex. The new |
| 25 | // relationship is: |
| 26 | // old_src -(read before)-> new_vertex <-(write before)- old_dst |
| 27 | // new_vertex is a MOVE operation that moves some existing blocks into |
| 28 | // temp space. The temp extents are, by necessity, stored in new_vertex |
| 29 | // (as dst extents) and old_dst (as src extents), but they are also broken |
| 30 | // out into tmp_extents, as the nodes themselves may contain many more |
| 31 | // extents. |
| 32 | struct CutEdgeVertexes { |
| 33 | Vertex::Index new_vertex; |
| 34 | Vertex::Index old_src; |
| 35 | Vertex::Index old_dst; |
| 36 | std::vector<Extent> tmp_extents; |
| 37 | }; |
| 38 | |
adlr@google.com | 3defe6a | 2009-12-04 20:57:17 +0000 | [diff] [blame] | 39 | class DeltaDiffGenerator { |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 40 | public: |
| 41 | // Represents a disk block on the install partition. |
| 42 | struct Block { |
| 43 | // During install, each block on the install partition will be written |
| 44 | // and some may be read (in all likelihood, many will be read). |
| 45 | // The reading and writing will be performed by InstallOperations, |
| 46 | // each of which has a corresponding vertex in a graph. |
| 47 | // A Block object tells which vertex will read or write this block |
| 48 | // at install time. |
| 49 | // Generally, there will be a vector of Block objects whose length |
| 50 | // is the number of blocks on the install partition. |
| 51 | Block() : reader(Vertex::kInvalidIndex), writer(Vertex::kInvalidIndex) {} |
| 52 | Vertex::Index reader; |
| 53 | Vertex::Index writer; |
| 54 | }; |
| 55 | |
| 56 | // This is the only function that external users of the class should call. |
| 57 | // old_image and new_image are paths to two image files. They should be |
| 58 | // mounted read-only at paths old_root and new_root respectively. |
Andrew de los Reyes | f4c7ef1 | 2010-04-30 10:37:00 -0700 | [diff] [blame] | 59 | // {old,new}_kernel_part are paths to the old and new kernel partition |
| 60 | // images, respectively. |
Andrew de los Reyes | 932bc4c | 2010-08-23 18:14:09 -0700 | [diff] [blame] | 61 | // private_key_path points to a private key used to sign the update. |
| 62 | // Pass empty string to not sign the update. |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 63 | // output_path is the filename where the delta update should be written. |
Darin Petkov | 8e447e0 | 2013-04-16 16:23:50 +0200 | [diff] [blame] | 64 | // If |chunk_size| is not -1, the delta payload is generated based on |
| 65 | // |chunk_size| chunks rather than whole files. |
Chris Sosa | d5ae156 | 2013-04-23 13:20:18 -0700 | [diff] [blame] | 66 | // This method computes scratch space based on |rootfs_partition_size|. |
Jay Srinivasan | 738fdf3 | 2012-12-07 17:40:54 -0800 | [diff] [blame] | 67 | // Returns true on success. Also writes the size of the metadata into |
| 68 | // |metadata_size|. |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 69 | static bool GenerateDeltaUpdateFile(const std::string& old_root, |
| 70 | const std::string& old_image, |
| 71 | const std::string& new_root, |
| 72 | const std::string& new_image, |
Andrew de los Reyes | f4c7ef1 | 2010-04-30 10:37:00 -0700 | [diff] [blame] | 73 | const std::string& old_kernel_part, |
| 74 | const std::string& new_kernel_part, |
Andrew de los Reyes | 932bc4c | 2010-08-23 18:14:09 -0700 | [diff] [blame] | 75 | const std::string& output_path, |
Jay Srinivasan | 738fdf3 | 2012-12-07 17:40:54 -0800 | [diff] [blame] | 76 | const std::string& private_key_path, |
Darin Petkov | 8e447e0 | 2013-04-16 16:23:50 +0200 | [diff] [blame] | 77 | off_t chunk_size, |
Chris Sosa | d5ae156 | 2013-04-23 13:20:18 -0700 | [diff] [blame] | 78 | size_t rootfs_partition_size, |
Don Garrett | 0dd3985 | 2013-04-03 16:55:42 -0700 | [diff] [blame] | 79 | const ImageInfo* old_image_info, |
| 80 | const ImageInfo* new_image_info, |
Jay Srinivasan | 738fdf3 | 2012-12-07 17:40:54 -0800 | [diff] [blame] | 81 | uint64_t* metadata_size); |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 82 | |
| 83 | // These functions are public so that the unit tests can access them: |
| 84 | |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 85 | // Takes a graph, which is not a DAG, which represents the files just |
| 86 | // read from disk, and converts it into a DAG by breaking all cycles |
| 87 | // and finding temp space to resolve broken edges. |
| 88 | // The final order of the nodes is given in |final_order| |
| 89 | // Some files may need to be reread from disk, thus |fd| and |
| 90 | // |data_file_size| are be passed. |
Andrew de los Reyes | 927179d | 2010-12-02 11:26:48 -0800 | [diff] [blame] | 91 | // If |scratch_vertex| is not kInvalidIndex, removes it from |
| 92 | // |final_order| before returning. |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 93 | // Returns true on success. |
| 94 | static bool ConvertGraphToDag(Graph* graph, |
| 95 | const std::string& new_root, |
| 96 | int fd, |
| 97 | off_t* data_file_size, |
Andrew de los Reyes | 927179d | 2010-12-02 11:26:48 -0800 | [diff] [blame] | 98 | std::vector<Vertex::Index>* final_order, |
| 99 | Vertex::Index scratch_vertex); |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 100 | |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 101 | // Reads old_filename (if it exists) and a new_filename and determines |
| 102 | // the smallest way to encode this file for the diff. It stores |
| 103 | // necessary data in out_data and fills in out_op. |
| 104 | // If there's no change in old and new files, it creates a MOVE |
| 105 | // operation. If there is a change, or the old file doesn't exist, |
| 106 | // the smallest of REPLACE, REPLACE_BZ, or BSDIFF wins. |
| 107 | // new_filename must contain at least one byte. |
Darin Petkov | 8e447e0 | 2013-04-16 16:23:50 +0200 | [diff] [blame] | 108 | // |new_filename| is read starting at |chunk_offset|. |
| 109 | // If |chunk_size| is not -1, only up to |chunk_size| bytes are diffed. |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 110 | // Returns true on success. |
| 111 | static bool ReadFileToDiff(const std::string& old_filename, |
| 112 | const std::string& new_filename, |
Darin Petkov | 8e447e0 | 2013-04-16 16:23:50 +0200 | [diff] [blame] | 113 | off_t chunk_offset, |
| 114 | off_t chunk_size, |
Don Garrett | 36e6077 | 2012-03-29 10:31:20 -0700 | [diff] [blame] | 115 | bool bsdiff_allowed, |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 116 | std::vector<char>* out_data, |
Darin Petkov | 68c10d1 | 2010-10-14 09:24:37 -0700 | [diff] [blame] | 117 | DeltaArchiveManifest_InstallOperation* out_op, |
| 118 | bool gather_extents); |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 119 | |
Andrew de los Reyes | 927179d | 2010-12-02 11:26:48 -0800 | [diff] [blame] | 120 | // Creates a dummy REPLACE_BZ node in the given |vertex|. This can be used |
| 121 | // to provide scratch space. The node writes |num_blocks| blocks starting at |
| 122 | // |start_block|The node should be marked invalid before writing all nodes to |
| 123 | // the output file. |
| 124 | static void CreateScratchNode(uint64_t start_block, |
| 125 | uint64_t num_blocks, |
| 126 | Vertex* vertex); |
| 127 | |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 128 | // Modifies blocks read by 'op' so that any blocks referred to by |
| 129 | // 'remove_extents' are replaced with blocks from 'replace_extents'. |
| 130 | // 'remove_extents' and 'replace_extents' must be the same number of blocks. |
| 131 | // Blocks will be substituted in the order listed in the vectors. |
| 132 | // E.g. if 'op' reads blocks 1, 2, 3, 4, 5, 6, 7, 8, remove_extents |
| 133 | // contains blocks 6, 2, 3, 5, and replace blocks contains |
| 134 | // 12, 13, 14, 15, then op will be changed to read from: |
| 135 | // 1, 13, 14, 4, 15, 12, 7, 8 |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 136 | static void SubstituteBlocks(Vertex* vertex, |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 137 | const std::vector<Extent>& remove_extents, |
| 138 | const std::vector<Extent>& replace_extents); |
| 139 | |
| 140 | // Cuts 'edges' from 'graph' according to the AU algorithm. This means |
| 141 | // for each edge A->B, remove the dependency that B occur before A. |
| 142 | // Do this by creating a new operation X that copies from the blocks |
| 143 | // specified by the edge's properties to temp space T. Modify B to read |
| 144 | // from T rather than the blocks in the edge. Modify A to depend on X, |
| 145 | // but not on B. Free space is found by looking in 'blocks'. |
| 146 | // Returns true on success. |
| 147 | static bool CutEdges(Graph* graph, |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 148 | const std::set<Edge>& edges, |
| 149 | std::vector<CutEdgeVertexes>* out_cuts); |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 150 | |
| 151 | // Stores all Extents in 'extents' into 'out'. |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 152 | static void StoreExtents(const std::vector<Extent>& extents, |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 153 | google::protobuf::RepeatedPtrField<Extent>* out); |
Darin Petkov | 7ea3233 | 2010-10-13 10:46:11 -0700 | [diff] [blame] | 154 | |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 155 | // Creates all the edges for the graph. Writers of a block point to |
| 156 | // readers of the same block. This is because for an edge A->B, B |
| 157 | // must complete before A executes. |
| 158 | static void CreateEdges(Graph* graph, const std::vector<Block>& blocks); |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 159 | |
| 160 | // Given a topologically sorted graph |op_indexes| and |graph|, alters |
| 161 | // |op_indexes| to move all the full operations to the end of the vector. |
| 162 | // Full operations should not be depended on, so this is safe. |
| 163 | static void MoveFullOpsToBack(Graph* graph, |
| 164 | std::vector<Vertex::Index>* op_indexes); |
| 165 | |
| 166 | // Sorts the vector |cuts| by its |cuts[].old_dest| member. Order is |
| 167 | // determined by the order of elements in op_indexes. |
| 168 | static void SortCutsByTopoOrder(std::vector<Vertex::Index>& op_indexes, |
| 169 | std::vector<CutEdgeVertexes>* cuts); |
| 170 | |
| 171 | // Returns true iff there are no extents in the graph that refer to temp |
| 172 | // blocks. Temp blocks are in the range [kTempBlockStart, kSparseHole). |
| 173 | static bool NoTempBlocksRemain(const Graph& graph); |
Darin Petkov | 7ea3233 | 2010-10-13 10:46:11 -0700 | [diff] [blame] | 174 | |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 175 | // Install operations in the manifest may reference data blobs, which |
| 176 | // are in data_blobs_path. This function creates a new data blobs file |
| 177 | // with the data blobs in the same order as the referencing install |
| 178 | // operations in the manifest. E.g. if manifest[0] has a data blob |
| 179 | // "X" at offset 1, manifest[1] has a data blob "Y" at offset 0, |
| 180 | // and data_blobs_path's file contains "YX", new_data_blobs_path |
| 181 | // will set to be a file that contains "XY". |
| 182 | static bool ReorderDataBlobs(DeltaArchiveManifest* manifest, |
| 183 | const std::string& data_blobs_path, |
| 184 | const std::string& new_data_blobs_path); |
Darin Petkov | 7ea3233 | 2010-10-13 10:46:11 -0700 | [diff] [blame] | 185 | |
Jay Srinivasan | 00f76b6 | 2012-09-17 18:48:36 -0700 | [diff] [blame] | 186 | // Computes a SHA256 hash of the given buf and sets the hash value in the |
| 187 | // operation so that update_engine could verify. This hash should be set |
| 188 | // for all operations that have a non-zero data blob. One exception is the |
| 189 | // dummy operation for signature blob because the contents of the signature |
| 190 | // blob will not be available at payload creation time. So, update_engine will |
| 191 | // gracefully ignore the dummy signature operation. |
| 192 | static bool AddOperationHash(DeltaArchiveManifest_InstallOperation* op, |
| 193 | const std::vector<char>& buf); |
| 194 | |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 195 | // Handles allocation of temp blocks to a cut edge by converting the |
| 196 | // dest node to a full op. This removes the need for temp blocks, but |
| 197 | // comes at the cost of a worse compression ratio. |
| 198 | // For example, say we have A->B->A. It would first be cut to form: |
| 199 | // A->B->N<-A, where N copies blocks to temp space. If there are no |
| 200 | // temp blocks, this function can be called to convert it to the form: |
| 201 | // A->B. Now, A is a full operation. |
| 202 | static bool ConvertCutToFullOp(Graph* graph, |
| 203 | const CutEdgeVertexes& cut, |
| 204 | const std::string& new_root, |
| 205 | int data_fd, |
| 206 | off_t* data_file_size); |
Darin Petkov | 7ea3233 | 2010-10-13 10:46:11 -0700 | [diff] [blame] | 207 | |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 208 | // Takes |op_indexes|, which is effectively a mapping from order in |
| 209 | // which the op is performed -> graph vertex index, and produces the |
| 210 | // reverse: a mapping from graph vertex index -> op_indexes index. |
| 211 | static void GenerateReverseTopoOrderMap( |
| 212 | std::vector<Vertex::Index>& op_indexes, |
| 213 | std::vector<std::vector<Vertex::Index>::size_type>* reverse_op_indexes); |
Darin Petkov | 7ea3233 | 2010-10-13 10:46:11 -0700 | [diff] [blame] | 214 | |
Andrew de los Reyes | ef01755 | 2010-10-06 17:57:52 -0700 | [diff] [blame] | 215 | // Takes a |graph|, which has edges that must be cut, as listed in |
| 216 | // |cuts|. Cuts the edges. Maintains a list in which the operations |
| 217 | // will be performed (in |op_indexes|) and the reverse (in |
| 218 | // |reverse_op_indexes|). Cutting edges requires scratch space, and |
| 219 | // if insufficient scratch is found, the file is reread and will be |
| 220 | // send down (either as REPLACE or REPLACE_BZ). Returns true on |
| 221 | // success. |
| 222 | static bool AssignTempBlocks( |
| 223 | Graph* graph, |
| 224 | const std::string& new_root, |
| 225 | int data_fd, |
| 226 | off_t* data_file_size, |
| 227 | std::vector<Vertex::Index>* op_indexes, |
| 228 | std::vector<std::vector<Vertex::Index>::size_type>* reverse_op_indexes, |
Andrew de los Reyes | 4ba850d | 2010-10-25 12:12:40 -0700 | [diff] [blame] | 229 | const std::vector<CutEdgeVertexes>& cuts); |
Andrew de los Reyes | b10320d | 2010-03-31 16:44:44 -0700 | [diff] [blame] | 230 | |
Darin Petkov | 9fa7ec5 | 2010-10-18 11:45:23 -0700 | [diff] [blame] | 231 | // Returns true if |op| is a no-op operation that doesn't do any useful work |
| 232 | // (e.g., a move operation that copies blocks onto themselves). |
| 233 | static bool IsNoopOperation(const DeltaArchiveManifest_InstallOperation& op); |
| 234 | |
Andrew de los Reyes | 89f17be | 2010-10-22 13:39:09 -0700 | [diff] [blame] | 235 | static bool InitializePartitionInfo(bool is_kernel, |
| 236 | const std::string& partition, |
| 237 | PartitionInfo* info); |
| 238 | |
Thieu Le | 5c7d975 | 2010-12-15 16:09:28 -0800 | [diff] [blame] | 239 | // Runs the bsdiff tool on two files and returns the resulting delta in |
| 240 | // |out|. Returns true on success. |
| 241 | static bool BsdiffFiles(const std::string& old_file, |
| 242 | const std::string& new_file, |
| 243 | std::vector<char>* out); |
| 244 | |
| 245 | // The |blocks| vector contains a reader and writer for each block on the |
| 246 | // filesystem that's being in-place updated. We populate the reader/writer |
| 247 | // fields of |blocks| by calling this function. |
| 248 | // For each block in |operation| that is read or written, find that block |
| 249 | // in |blocks| and set the reader/writer field to the vertex passed. |
| 250 | // |graph| is not strictly necessary, but useful for printing out |
| 251 | // error messages. |
| 252 | static bool AddInstallOpToBlocksVector( |
| 253 | const DeltaArchiveManifest_InstallOperation& operation, |
| 254 | const Graph& graph, |
| 255 | Vertex::Index vertex, |
| 256 | std::vector<DeltaDiffGenerator::Block>* blocks); |
| 257 | |
Darin Petkov | 9574f7e | 2011-01-13 10:48:12 -0800 | [diff] [blame] | 258 | // Adds to |manifest| a dummy operation that points to a signature blob |
| 259 | // located at the specified offset/length. |
| 260 | static void AddSignatureOp(uint64_t signature_blob_offset, |
| 261 | uint64_t signature_blob_length, |
| 262 | DeltaArchiveManifest* manifest); |
| 263 | |
adlr@google.com | 3defe6a | 2009-12-04 20:57:17 +0000 | [diff] [blame] | 264 | private: |
Jay Srinivasan | 738fdf3 | 2012-12-07 17:40:54 -0800 | [diff] [blame] | 265 | // This should never be constructed |
adlr@google.com | 3defe6a | 2009-12-04 20:57:17 +0000 | [diff] [blame] | 266 | DISALLOW_IMPLICIT_CONSTRUCTORS(DeltaDiffGenerator); |
| 267 | }; |
| 268 | |
Andrew de los Reyes | 09e56d6 | 2010-04-23 13:45:53 -0700 | [diff] [blame] | 269 | extern const char* const kBsdiffPath; |
| 270 | extern const char* const kBspatchPath; |
| 271 | extern const char* const kDeltaMagic; |
Chris Sosa | d5ae156 | 2013-04-23 13:20:18 -0700 | [diff] [blame] | 272 | extern const size_t kRootFSPartitionSize; |
Andrew de los Reyes | 09e56d6 | 2010-04-23 13:45:53 -0700 | [diff] [blame] | 273 | |
adlr@google.com | 3defe6a | 2009-12-04 20:57:17 +0000 | [diff] [blame] | 274 | }; // namespace chromeos_update_engine |
| 275 | |
| 276 | #endif // CHROMEOS_PLATFORM_UPDATE_ENGINE_DELTA_DIFF_GENERATOR_H__ |