Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 1 | // |
| 2 | // Copyright (C) 2021 The Android Open Source Project |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | // |
| 16 | |
| 17 | #include "lz4diff_compress.h" |
| 18 | |
| 19 | #include "update_engine/common/utils.h" |
| 20 | #include "update_engine/common/hash_calculator.h" |
| 21 | #include "update_engine/payload_generator/delta_diff_generator.h" |
| 22 | #include "update_engine/payload_generator/payload_generation_config.h" |
| 23 | |
| 24 | #include <base/logging.h> |
| 25 | #include <lz4.h> |
| 26 | #include <lz4hc.h> |
| 27 | |
| 28 | namespace chromeos_update_engine { |
| 29 | |
Kelvin Zhang | 760c334 | 2022-02-08 13:41:49 -0800 | [diff] [blame] | 30 | bool TryCompressBlob(std::string_view blob, |
| 31 | const std::vector<CompressedBlock>& block_info, |
| 32 | const bool zero_padding_enabled, |
| 33 | const CompressionAlgorithm compression_algo, |
| 34 | const SinkFunc& sink) { |
| 35 | size_t uncompressed_size = 0; |
| 36 | for (const auto& block : block_info) { |
| 37 | CHECK_EQ(uncompressed_size, block.uncompressed_offset) |
| 38 | << "Compressed block info is expected to be sorted."; |
| 39 | uncompressed_size += block.uncompressed_length; |
| 40 | } |
| 41 | auto hc = LZ4_createStreamHC(); |
| 42 | DEFER { |
| 43 | if (hc) { |
| 44 | LZ4_freeStreamHC(hc); |
| 45 | hc = nullptr; |
| 46 | } |
| 47 | }; |
| 48 | size_t compressed_offset = 0; |
| 49 | Blob block_buffer; |
| 50 | for (const auto& block : block_info) { |
| 51 | const auto uncompressed_block = |
| 52 | blob.substr(block.uncompressed_offset, block.uncompressed_length); |
| 53 | if (!block.IsCompressed()) { |
| 54 | TEST_EQ(sink(reinterpret_cast<const uint8_t*>(uncompressed_block.data()), |
| 55 | uncompressed_block.size()), |
| 56 | uncompressed_block.size()); |
| 57 | continue; |
| 58 | } |
| 59 | block_buffer.resize(block.compressed_length); |
| 60 | // Execute the increment at end of each loop |
| 61 | DEFER { |
| 62 | compressed_offset += block.compressed_length; |
| 63 | block_buffer.clear(); |
| 64 | }; |
| 65 | |
| 66 | int ret = 0; |
| 67 | // LZ4 spec enforces that last op of a compressed block must be an insert op |
| 68 | // of at least 5 bytes. Compressors will try to conform to that requirement |
| 69 | // if the input size is just right. We don't want that. So always give a |
| 70 | // little bit more data. |
| 71 | switch (int src_size = uncompressed_size - block.uncompressed_offset; |
| 72 | compression_algo.type()) { |
| 73 | case CompressionAlgorithm::LZ4HC: |
| 74 | ret = LZ4_compress_HC_destSize( |
| 75 | hc, |
| 76 | uncompressed_block.data(), |
| 77 | reinterpret_cast<char*>(block_buffer.data()), |
| 78 | &src_size, |
| 79 | block.compressed_length, |
| 80 | compression_algo.level()); |
| 81 | break; |
| 82 | case CompressionAlgorithm::LZ4: |
| 83 | ret = |
| 84 | LZ4_compress_destSize(uncompressed_block.data(), |
| 85 | reinterpret_cast<char*>(block_buffer.data()), |
| 86 | &src_size, |
| 87 | block.compressed_length); |
| 88 | break; |
| 89 | default: |
| 90 | LOG(ERROR) << "Unrecognized compression algorithm: " |
| 91 | << compression_algo.type(); |
| 92 | return {}; |
| 93 | } |
| 94 | TEST_GT(ret, 0); |
| 95 | const uint64_t bytes_written = ret; |
| 96 | // Last block may have trailing zeros |
| 97 | TEST_LE(bytes_written, block.compressed_length); |
| 98 | if (bytes_written < block.compressed_length) { |
| 99 | if (zero_padding_enabled) { |
| 100 | const auto padding = block.compressed_length - bytes_written; |
| 101 | std::memmove( |
| 102 | block_buffer.data() + padding, block_buffer.data(), bytes_written); |
| 103 | std::fill(block_buffer.data(), block_buffer.data() + padding, 0); |
| 104 | |
| 105 | } else { |
| 106 | std::fill(block_buffer.data() + bytes_written, |
| 107 | block_buffer.data() + block.compressed_length, |
| 108 | 0); |
| 109 | } |
| 110 | } |
| 111 | TEST_EQ(sink(block_buffer.data(), block_buffer.size()), |
| 112 | block_buffer.size()); |
| 113 | } |
| 114 | // Any trailing data will be copied to the output buffer. |
| 115 | TEST_EQ( |
| 116 | sink(reinterpret_cast<const uint8_t*>(blob.data()) + uncompressed_size, |
| 117 | blob.size() - uncompressed_size), |
| 118 | blob.size() - uncompressed_size); |
| 119 | return true; |
| 120 | } |
| 121 | |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 122 | Blob TryCompressBlob(std::string_view blob, |
| 123 | const std::vector<CompressedBlock>& block_info, |
| 124 | const bool zero_padding_enabled, |
| 125 | const CompressionAlgorithm compression_algo) { |
| 126 | size_t uncompressed_size = 0; |
| 127 | size_t compressed_size = 0; |
| 128 | for (const auto& block : block_info) { |
| 129 | CHECK_EQ(uncompressed_size, block.uncompressed_offset) |
| 130 | << "Compressed block info is expected to be sorted."; |
| 131 | uncompressed_size += block.uncompressed_length; |
| 132 | compressed_size += block.compressed_length; |
| 133 | } |
Kelvin Zhang | 760c334 | 2022-02-08 13:41:49 -0800 | [diff] [blame] | 134 | TEST_EQ(uncompressed_size, blob.size()); |
| 135 | Blob output; |
| 136 | output.reserve(utils::RoundUp(compressed_size, kBlockSize)); |
| 137 | if (!TryCompressBlob(blob, |
| 138 | block_info, |
| 139 | zero_padding_enabled, |
| 140 | compression_algo, |
| 141 | [&output](const uint8_t* data, size_t size) { |
| 142 | output.insert(output.end(), data, data + size); |
| 143 | return size; |
| 144 | })) { |
| 145 | return {}; |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 146 | } |
Kelvin Zhang | 760c334 | 2022-02-08 13:41:49 -0800 | [diff] [blame] | 147 | |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 148 | return output; |
| 149 | } |
| 150 | |
| 151 | Blob TryDecompressBlob(std::string_view blob, |
| 152 | const std::vector<CompressedBlock>& block_info, |
| 153 | const bool zero_padding_enabled) { |
| 154 | if (block_info.empty()) { |
| 155 | return {}; |
| 156 | } |
| 157 | size_t uncompressed_size = 0; |
| 158 | size_t compressed_size = 0; |
| 159 | for (const auto& block : block_info) { |
| 160 | CHECK_EQ(uncompressed_size, block.uncompressed_offset) |
| 161 | << " Compressed block info is expected to be sorted, expected offset " |
| 162 | << uncompressed_size << ", actual block " << block; |
| 163 | uncompressed_size += block.uncompressed_length; |
| 164 | compressed_size += block.compressed_length; |
| 165 | } |
| 166 | if (blob.size() < compressed_size) { |
Kelvin Zhang | 8389dfe | 2022-01-13 12:47:11 -0800 | [diff] [blame] | 167 | LOG(INFO) << "File is chunked. Skip lz4 decompress. Expected size: " |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 168 | << compressed_size << ", actual size: " << blob.size(); |
| 169 | return {}; |
| 170 | } |
| 171 | Blob output; |
| 172 | output.reserve(uncompressed_size); |
| 173 | size_t compressed_offset = 0; |
| 174 | for (const auto& block : block_info) { |
| 175 | std::string_view cluster = |
| 176 | blob.substr(compressed_offset, block.compressed_length); |
| 177 | if (!block.IsCompressed()) { |
| 178 | CHECK_NE(cluster.size(), 0UL); |
| 179 | output.insert(output.end(), cluster.begin(), cluster.end()); |
| 180 | compressed_offset += cluster.size(); |
| 181 | continue; |
| 182 | } |
| 183 | size_t inputmargin = 0; |
| 184 | if (zero_padding_enabled) { |
Greg Kaiser | 4203702 | 2022-01-04 08:48:18 -0800 | [diff] [blame] | 185 | while (inputmargin < std::min(kBlockSize, cluster.size()) && |
| 186 | cluster[inputmargin] == 0) { |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 187 | inputmargin++; |
| 188 | } |
| 189 | } |
| 190 | output.resize(output.size() + block.uncompressed_length); |
| 191 | |
| 192 | const auto bytes_decompressed = LZ4_decompress_safe_partial( |
| 193 | cluster.data() + inputmargin, |
| 194 | reinterpret_cast<char*>(output.data()) + output.size() - |
| 195 | block.uncompressed_length, |
| 196 | cluster.size() - inputmargin, |
| 197 | block.uncompressed_length, |
| 198 | block.uncompressed_length); |
| 199 | if (bytes_decompressed < 0) { |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 200 | LOG(FATAL) << "Failed to decompress, " << bytes_decompressed |
| 201 | << ", output_cursor = " |
| 202 | << output.size() - block.uncompressed_length |
| 203 | << ", input_cursor = " << compressed_offset |
| 204 | << ", blob.size() = " << blob.size() |
| 205 | << ", cluster_size = " << block.compressed_length |
| 206 | << ", dest capacity = " << block.uncompressed_length |
| 207 | << ", input margin = " << inputmargin << " " |
Kelvin Zhang | 760c334 | 2022-02-08 13:41:49 -0800 | [diff] [blame] | 208 | << HashCalculator::SHA256Digest(cluster) << " " |
| 209 | << HashCalculator::SHA256Digest(blob); |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 210 | return {}; |
| 211 | } |
| 212 | compressed_offset += block.compressed_length; |
| 213 | CHECK_EQ(static_cast<uint64_t>(bytes_decompressed), |
| 214 | block.uncompressed_length); |
| 215 | } |
| 216 | CHECK_EQ(output.size(), uncompressed_size); |
| 217 | |
| 218 | // Trailing data not recorded by compressed block info will be treated as |
| 219 | // uncompressed, most of the time these are xattrs or trailing zeros. |
| 220 | CHECK_EQ(blob.size(), compressed_offset) |
| 221 | << " Unexpected data the end of compressed data "; |
| 222 | if (compressed_offset < blob.size()) { |
| 223 | output.insert(output.end(), blob.begin() + compressed_offset, blob.end()); |
| 224 | } |
| 225 | |
| 226 | return output; |
| 227 | } |
| 228 | |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 229 | Blob TryDecompressBlob(const Blob& blob, |
| 230 | const std::vector<CompressedBlock>& block_info, |
| 231 | const bool zero_padding_enabled) { |
| 232 | return TryDecompressBlob( |
| 233 | ToStringView(blob), block_info, zero_padding_enabled); |
| 234 | } |
| 235 | |
| 236 | std::ostream& operator<<(std::ostream& out, const CompressedBlock& block) { |
| 237 | out << "CompressedBlock{.uncompressed_offset = " << block.uncompressed_offset |
| 238 | << ", .compressed_length = " << block.compressed_length |
| 239 | << ", .uncompressed_length = " << block.uncompressed_length << "}"; |
| 240 | return out; |
| 241 | } |
| 242 | |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 243 | std::ostream& operator<<(std::ostream& out, const CompressedBlockInfo& info) { |
| 244 | out << "BlockInfo { compressed_length: " << info.compressed_length() |
| 245 | << ", uncompressed_length: " << info.uncompressed_length() |
| 246 | << ", uncompressed_offset: " << info.uncompressed_offset(); |
| 247 | if (!info.sha256_hash().empty()) { |
| 248 | out << ", sha256_hash: " << HexEncode(info.sha256_hash()); |
| 249 | } |
| 250 | if (!info.postfix_bspatch().empty()) { |
| 251 | out << ", postfix_bspatch: " << info.postfix_bspatch().size(); |
| 252 | } |
| 253 | out << "}"; |
| 254 | return out; |
| 255 | } |
| 256 | |
| 257 | } // namespace chromeos_update_engine |