| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 1 | // | 
|  | 2 | // Copyright (C) 2021 The Android Open Source Project | 
|  | 3 | // | 
|  | 4 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | 5 | // you may not use this file except in compliance with the License. | 
|  | 6 | // You may obtain a copy of the License at | 
|  | 7 | // | 
|  | 8 | //      http://www.apache.org/licenses/LICENSE-2.0 | 
|  | 9 | // | 
|  | 10 | // Unless required by applicable law or agreed to in writing, software | 
|  | 11 | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | 13 | // See the License for the specific language governing permissions and | 
|  | 14 | // limitations under the License. | 
|  | 15 | // | 
|  | 16 |  | 
|  | 17 | #include "lz4diff_compress.h" | 
|  | 18 |  | 
|  | 19 | #include "update_engine/common/utils.h" | 
|  | 20 | #include "update_engine/common/hash_calculator.h" | 
|  | 21 | #include "update_engine/payload_generator/delta_diff_generator.h" | 
|  | 22 | #include "update_engine/payload_generator/payload_generation_config.h" | 
|  | 23 |  | 
|  | 24 | #include <base/logging.h> | 
|  | 25 | #include <lz4.h> | 
|  | 26 | #include <lz4hc.h> | 
|  | 27 |  | 
|  | 28 | namespace chromeos_update_engine { | 
|  | 29 |  | 
| Kelvin Zhang | 760c334 | 2022-02-08 13:41:49 -0800 | [diff] [blame] | 30 | bool TryCompressBlob(std::string_view blob, | 
|  | 31 | const std::vector<CompressedBlock>& block_info, | 
|  | 32 | const bool zero_padding_enabled, | 
|  | 33 | const CompressionAlgorithm compression_algo, | 
|  | 34 | const SinkFunc& sink) { | 
|  | 35 | size_t uncompressed_size = 0; | 
|  | 36 | for (const auto& block : block_info) { | 
|  | 37 | CHECK_EQ(uncompressed_size, block.uncompressed_offset) | 
|  | 38 | << "Compressed block info is expected to be sorted."; | 
|  | 39 | uncompressed_size += block.uncompressed_length; | 
|  | 40 | } | 
|  | 41 | auto hc = LZ4_createStreamHC(); | 
|  | 42 | DEFER { | 
|  | 43 | if (hc) { | 
|  | 44 | LZ4_freeStreamHC(hc); | 
|  | 45 | hc = nullptr; | 
|  | 46 | } | 
|  | 47 | }; | 
|  | 48 | size_t compressed_offset = 0; | 
|  | 49 | Blob block_buffer; | 
|  | 50 | for (const auto& block : block_info) { | 
|  | 51 | const auto uncompressed_block = | 
|  | 52 | blob.substr(block.uncompressed_offset, block.uncompressed_length); | 
|  | 53 | if (!block.IsCompressed()) { | 
|  | 54 | TEST_EQ(sink(reinterpret_cast<const uint8_t*>(uncompressed_block.data()), | 
|  | 55 | uncompressed_block.size()), | 
|  | 56 | uncompressed_block.size()); | 
|  | 57 | continue; | 
|  | 58 | } | 
|  | 59 | block_buffer.resize(block.compressed_length); | 
|  | 60 | // Execute the increment at end of each loop | 
|  | 61 | DEFER { | 
|  | 62 | compressed_offset += block.compressed_length; | 
|  | 63 | block_buffer.clear(); | 
|  | 64 | }; | 
|  | 65 |  | 
|  | 66 | int ret = 0; | 
|  | 67 | // LZ4 spec enforces that last op of a compressed block must be an insert op | 
|  | 68 | // of at least 5 bytes. Compressors will try to conform to that requirement | 
|  | 69 | // if the input size is just right. We don't want that. So always give a | 
|  | 70 | // little bit more data. | 
|  | 71 | switch (int src_size = uncompressed_size - block.uncompressed_offset; | 
|  | 72 | compression_algo.type()) { | 
|  | 73 | case CompressionAlgorithm::LZ4HC: | 
|  | 74 | ret = LZ4_compress_HC_destSize( | 
|  | 75 | hc, | 
|  | 76 | uncompressed_block.data(), | 
|  | 77 | reinterpret_cast<char*>(block_buffer.data()), | 
|  | 78 | &src_size, | 
|  | 79 | block.compressed_length, | 
|  | 80 | compression_algo.level()); | 
|  | 81 | break; | 
|  | 82 | case CompressionAlgorithm::LZ4: | 
|  | 83 | ret = | 
|  | 84 | LZ4_compress_destSize(uncompressed_block.data(), | 
|  | 85 | reinterpret_cast<char*>(block_buffer.data()), | 
|  | 86 | &src_size, | 
|  | 87 | block.compressed_length); | 
|  | 88 | break; | 
|  | 89 | default: | 
|  | 90 | LOG(ERROR) << "Unrecognized compression algorithm: " | 
|  | 91 | << compression_algo.type(); | 
|  | 92 | return {}; | 
|  | 93 | } | 
|  | 94 | TEST_GT(ret, 0); | 
|  | 95 | const uint64_t bytes_written = ret; | 
|  | 96 | // Last block may have trailing zeros | 
|  | 97 | TEST_LE(bytes_written, block.compressed_length); | 
|  | 98 | if (bytes_written < block.compressed_length) { | 
|  | 99 | if (zero_padding_enabled) { | 
|  | 100 | const auto padding = block.compressed_length - bytes_written; | 
|  | 101 | std::memmove( | 
|  | 102 | block_buffer.data() + padding, block_buffer.data(), bytes_written); | 
|  | 103 | std::fill(block_buffer.data(), block_buffer.data() + padding, 0); | 
|  | 104 |  | 
|  | 105 | } else { | 
|  | 106 | std::fill(block_buffer.data() + bytes_written, | 
|  | 107 | block_buffer.data() + block.compressed_length, | 
|  | 108 | 0); | 
|  | 109 | } | 
|  | 110 | } | 
|  | 111 | TEST_EQ(sink(block_buffer.data(), block_buffer.size()), | 
|  | 112 | block_buffer.size()); | 
|  | 113 | } | 
|  | 114 | // Any trailing data will be copied to the output buffer. | 
|  | 115 | TEST_EQ( | 
|  | 116 | sink(reinterpret_cast<const uint8_t*>(blob.data()) + uncompressed_size, | 
|  | 117 | blob.size() - uncompressed_size), | 
|  | 118 | blob.size() - uncompressed_size); | 
|  | 119 | return true; | 
|  | 120 | } | 
|  | 121 |  | 
| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 122 | Blob TryCompressBlob(std::string_view blob, | 
|  | 123 | const std::vector<CompressedBlock>& block_info, | 
|  | 124 | const bool zero_padding_enabled, | 
|  | 125 | const CompressionAlgorithm compression_algo) { | 
|  | 126 | size_t uncompressed_size = 0; | 
|  | 127 | size_t compressed_size = 0; | 
|  | 128 | for (const auto& block : block_info) { | 
|  | 129 | CHECK_EQ(uncompressed_size, block.uncompressed_offset) | 
|  | 130 | << "Compressed block info is expected to be sorted."; | 
|  | 131 | uncompressed_size += block.uncompressed_length; | 
|  | 132 | compressed_size += block.compressed_length; | 
|  | 133 | } | 
| Kelvin Zhang | 760c334 | 2022-02-08 13:41:49 -0800 | [diff] [blame] | 134 | TEST_EQ(uncompressed_size, blob.size()); | 
|  | 135 | Blob output; | 
|  | 136 | output.reserve(utils::RoundUp(compressed_size, kBlockSize)); | 
|  | 137 | if (!TryCompressBlob(blob, | 
|  | 138 | block_info, | 
|  | 139 | zero_padding_enabled, | 
|  | 140 | compression_algo, | 
|  | 141 | [&output](const uint8_t* data, size_t size) { | 
|  | 142 | output.insert(output.end(), data, data + size); | 
|  | 143 | return size; | 
|  | 144 | })) { | 
|  | 145 | return {}; | 
| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 146 | } | 
| Kelvin Zhang | 760c334 | 2022-02-08 13:41:49 -0800 | [diff] [blame] | 147 |  | 
| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 148 | return output; | 
|  | 149 | } | 
|  | 150 |  | 
|  | 151 | Blob TryDecompressBlob(std::string_view blob, | 
|  | 152 | const std::vector<CompressedBlock>& block_info, | 
|  | 153 | const bool zero_padding_enabled) { | 
|  | 154 | if (block_info.empty()) { | 
|  | 155 | return {}; | 
|  | 156 | } | 
|  | 157 | size_t uncompressed_size = 0; | 
|  | 158 | size_t compressed_size = 0; | 
|  | 159 | for (const auto& block : block_info) { | 
|  | 160 | CHECK_EQ(uncompressed_size, block.uncompressed_offset) | 
|  | 161 | << " Compressed block info is expected to be sorted, expected offset " | 
|  | 162 | << uncompressed_size << ", actual block " << block; | 
|  | 163 | uncompressed_size += block.uncompressed_length; | 
|  | 164 | compressed_size += block.compressed_length; | 
|  | 165 | } | 
|  | 166 | if (blob.size() < compressed_size) { | 
| Kelvin Zhang | 8389dfe | 2022-01-13 12:47:11 -0800 | [diff] [blame] | 167 | LOG(INFO) << "File is chunked. Skip lz4 decompress. Expected size: " | 
| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 168 | << compressed_size << ", actual size: " << blob.size(); | 
|  | 169 | return {}; | 
|  | 170 | } | 
|  | 171 | Blob output; | 
|  | 172 | output.reserve(uncompressed_size); | 
|  | 173 | size_t compressed_offset = 0; | 
|  | 174 | for (const auto& block : block_info) { | 
|  | 175 | std::string_view cluster = | 
|  | 176 | blob.substr(compressed_offset, block.compressed_length); | 
|  | 177 | if (!block.IsCompressed()) { | 
|  | 178 | CHECK_NE(cluster.size(), 0UL); | 
|  | 179 | output.insert(output.end(), cluster.begin(), cluster.end()); | 
|  | 180 | compressed_offset += cluster.size(); | 
|  | 181 | continue; | 
|  | 182 | } | 
|  | 183 | size_t inputmargin = 0; | 
|  | 184 | if (zero_padding_enabled) { | 
| Greg Kaiser | 4203702 | 2022-01-04 08:48:18 -0800 | [diff] [blame] | 185 | while (inputmargin < std::min(kBlockSize, cluster.size()) && | 
|  | 186 | cluster[inputmargin] == 0) { | 
| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 187 | inputmargin++; | 
|  | 188 | } | 
|  | 189 | } | 
|  | 190 | output.resize(output.size() + block.uncompressed_length); | 
|  | 191 |  | 
|  | 192 | const auto bytes_decompressed = LZ4_decompress_safe_partial( | 
|  | 193 | cluster.data() + inputmargin, | 
|  | 194 | reinterpret_cast<char*>(output.data()) + output.size() - | 
|  | 195 | block.uncompressed_length, | 
|  | 196 | cluster.size() - inputmargin, | 
|  | 197 | block.uncompressed_length, | 
|  | 198 | block.uncompressed_length); | 
|  | 199 | if (bytes_decompressed < 0) { | 
| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 200 | LOG(FATAL) << "Failed to decompress, " << bytes_decompressed | 
|  | 201 | << ", output_cursor = " | 
|  | 202 | << output.size() - block.uncompressed_length | 
|  | 203 | << ", input_cursor = " << compressed_offset | 
|  | 204 | << ", blob.size() = " << blob.size() | 
|  | 205 | << ", cluster_size = " << block.compressed_length | 
|  | 206 | << ", dest capacity = " << block.uncompressed_length | 
|  | 207 | << ", input margin = " << inputmargin << " " | 
| Kelvin Zhang | 760c334 | 2022-02-08 13:41:49 -0800 | [diff] [blame] | 208 | << HashCalculator::SHA256Digest(cluster) << " " | 
|  | 209 | << HashCalculator::SHA256Digest(blob); | 
| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 210 | return {}; | 
|  | 211 | } | 
|  | 212 | compressed_offset += block.compressed_length; | 
|  | 213 | CHECK_EQ(static_cast<uint64_t>(bytes_decompressed), | 
|  | 214 | block.uncompressed_length); | 
|  | 215 | } | 
|  | 216 | CHECK_EQ(output.size(), uncompressed_size); | 
|  | 217 |  | 
|  | 218 | // Trailing data not recorded by compressed block info will be treated as | 
|  | 219 | // uncompressed, most of the time these are xattrs or trailing zeros. | 
|  | 220 | CHECK_EQ(blob.size(), compressed_offset) | 
|  | 221 | << " Unexpected data the end of compressed data "; | 
|  | 222 | if (compressed_offset < blob.size()) { | 
|  | 223 | output.insert(output.end(), blob.begin() + compressed_offset, blob.end()); | 
|  | 224 | } | 
|  | 225 |  | 
|  | 226 | return output; | 
|  | 227 | } | 
|  | 228 |  | 
| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 229 | Blob TryDecompressBlob(const Blob& blob, | 
|  | 230 | const std::vector<CompressedBlock>& block_info, | 
|  | 231 | const bool zero_padding_enabled) { | 
|  | 232 | return TryDecompressBlob( | 
|  | 233 | ToStringView(blob), block_info, zero_padding_enabled); | 
|  | 234 | } | 
|  | 235 |  | 
|  | 236 | std::ostream& operator<<(std::ostream& out, const CompressedBlock& block) { | 
|  | 237 | out << "CompressedBlock{.uncompressed_offset = " << block.uncompressed_offset | 
|  | 238 | << ", .compressed_length = " << block.compressed_length | 
|  | 239 | << ", .uncompressed_length = " << block.uncompressed_length << "}"; | 
|  | 240 | return out; | 
|  | 241 | } | 
|  | 242 |  | 
| Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 243 | std::ostream& operator<<(std::ostream& out, const CompressedBlockInfo& info) { | 
|  | 244 | out << "BlockInfo { compressed_length: " << info.compressed_length() | 
|  | 245 | << ", uncompressed_length: " << info.uncompressed_length() | 
|  | 246 | << ", uncompressed_offset: " << info.uncompressed_offset(); | 
|  | 247 | if (!info.sha256_hash().empty()) { | 
|  | 248 | out << ", sha256_hash: " << HexEncode(info.sha256_hash()); | 
|  | 249 | } | 
|  | 250 | if (!info.postfix_bspatch().empty()) { | 
|  | 251 | out << ", postfix_bspatch: " << info.postfix_bspatch().size(); | 
|  | 252 | } | 
|  | 253 | out << "}"; | 
|  | 254 | return out; | 
|  | 255 | } | 
|  | 256 |  | 
|  | 257 | }  // namespace chromeos_update_engine |