Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 1 | // |
| 2 | // Copyright (C) 2021 The Android Open Source Project |
| 3 | // |
| 4 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | // you may not use this file except in compliance with the License. |
| 6 | // You may obtain a copy of the License at |
| 7 | // |
| 8 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | // |
| 10 | // Unless required by applicable law or agreed to in writing, software |
| 11 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | // See the License for the specific language governing permissions and |
| 14 | // limitations under the License. |
| 15 | // |
| 16 | |
| 17 | #include "lz4diff_compress.h" |
| 18 | |
| 19 | #include "update_engine/common/utils.h" |
| 20 | #include "update_engine/common/hash_calculator.h" |
| 21 | #include "update_engine/payload_generator/delta_diff_generator.h" |
| 22 | #include "update_engine/payload_generator/payload_generation_config.h" |
| 23 | |
| 24 | #include <base/logging.h> |
| 25 | #include <lz4.h> |
| 26 | #include <lz4hc.h> |
| 27 | |
| 28 | namespace chromeos_update_engine { |
| 29 | |
| 30 | Blob TryCompressBlob(std::string_view blob, |
| 31 | const std::vector<CompressedBlock>& block_info, |
| 32 | const bool zero_padding_enabled, |
| 33 | const CompressionAlgorithm compression_algo) { |
| 34 | size_t uncompressed_size = 0; |
| 35 | size_t compressed_size = 0; |
| 36 | for (const auto& block : block_info) { |
| 37 | CHECK_EQ(uncompressed_size, block.uncompressed_offset) |
| 38 | << "Compressed block info is expected to be sorted."; |
| 39 | uncompressed_size += block.uncompressed_length; |
| 40 | compressed_size += block.compressed_length; |
| 41 | } |
| 42 | CHECK_EQ(uncompressed_size, blob.size()); |
Kelvin Zhang | 893b3a1 | 2021-12-30 12:28:53 -0800 | [diff] [blame] | 43 | Blob output(utils::RoundUp(compressed_size, kBlockSize)); |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 44 | auto hc = LZ4_createStreamHC(); |
| 45 | DEFER { |
| 46 | if (hc) { |
| 47 | LZ4_freeStreamHC(hc); |
| 48 | hc = nullptr; |
| 49 | } |
| 50 | }; |
| 51 | size_t compressed_offset = 0; |
| 52 | for (const auto& block : block_info) { |
| 53 | // Execute the increment at end of each loop |
| 54 | DEFER { compressed_offset += block.compressed_length; }; |
| 55 | CHECK_LE(compressed_offset + block.compressed_length, output.size()); |
| 56 | |
| 57 | if (!block.IsCompressed()) { |
| 58 | std::memcpy(output.data() + compressed_offset, |
| 59 | blob.data() + block.uncompressed_offset, |
| 60 | block.compressed_length); |
| 61 | continue; |
| 62 | } |
| 63 | // LZ4 spec enforces that last op of a compressed block must be an insert op |
| 64 | // of at least 5 bytes. Compressors will try to conform to that requirement |
| 65 | // if the input size is just right. We don't want that. So always give a |
| 66 | // little bit more data. |
| 67 | int src_size = uncompressed_size - block.uncompressed_offset; |
| 68 | uint64_t bytes_written = 0; |
| 69 | switch (compression_algo.type()) { |
| 70 | case CompressionAlgorithm::LZ4HC: |
| 71 | bytes_written = LZ4_compress_HC_destSize( |
| 72 | hc, |
| 73 | blob.data() + block.uncompressed_offset, |
| 74 | reinterpret_cast<char*>(output.data()) + compressed_offset, |
| 75 | &src_size, |
| 76 | block.compressed_length, |
| 77 | compression_algo.level()); |
| 78 | break; |
| 79 | case CompressionAlgorithm::LZ4: |
| 80 | bytes_written = LZ4_compress_destSize( |
| 81 | blob.data() + block.uncompressed_offset, |
| 82 | reinterpret_cast<char*>(output.data()) + compressed_offset, |
| 83 | &src_size, |
| 84 | block.compressed_length); |
| 85 | break; |
| 86 | default: |
| 87 | CHECK(false) << "Unrecognized compression algorithm: " |
| 88 | << compression_algo.type(); |
| 89 | break; |
| 90 | } |
| 91 | // Last block may have trailing zeros |
| 92 | CHECK_LE(bytes_written, block.compressed_length); |
| 93 | if (bytes_written < block.compressed_length) { |
| 94 | if (zero_padding_enabled) { |
| 95 | const auto padding = block.compressed_length - bytes_written; |
| 96 | // LOG(INFO) << "Padding: " << padding; |
| 97 | CHECK_LE(compressed_offset + padding + bytes_written, output.size()); |
| 98 | std::memmove(output.data() + compressed_offset + padding, |
| 99 | output.data() + compressed_offset, |
| 100 | bytes_written); |
| 101 | CHECK_LE(compressed_offset + padding, output.size()); |
| 102 | std::fill(output.data() + compressed_offset, |
| 103 | output.data() + compressed_offset + padding, |
| 104 | 0); |
| 105 | |
| 106 | } else { |
| 107 | std::fill(output.data() + compressed_offset + bytes_written, |
| 108 | output.data() + compressed_offset + block.compressed_length, |
| 109 | 0); |
| 110 | } |
| 111 | } |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 112 | } |
| 113 | // Any trailing data will be copied to the output buffer. |
| 114 | output.insert(output.end(), blob.begin() + uncompressed_size, blob.end()); |
| 115 | return output; |
| 116 | } |
| 117 | |
| 118 | Blob TryDecompressBlob(std::string_view blob, |
| 119 | const std::vector<CompressedBlock>& block_info, |
| 120 | const bool zero_padding_enabled) { |
| 121 | if (block_info.empty()) { |
| 122 | return {}; |
| 123 | } |
| 124 | size_t uncompressed_size = 0; |
| 125 | size_t compressed_size = 0; |
| 126 | for (const auto& block : block_info) { |
| 127 | CHECK_EQ(uncompressed_size, block.uncompressed_offset) |
| 128 | << " Compressed block info is expected to be sorted, expected offset " |
| 129 | << uncompressed_size << ", actual block " << block; |
| 130 | uncompressed_size += block.uncompressed_length; |
| 131 | compressed_size += block.compressed_length; |
| 132 | } |
| 133 | if (blob.size() < compressed_size) { |
Kelvin Zhang | 8389dfe | 2022-01-13 12:47:11 -0800 | [diff] [blame^] | 134 | LOG(INFO) << "File is chunked. Skip lz4 decompress. Expected size: " |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 135 | << compressed_size << ", actual size: " << blob.size(); |
| 136 | return {}; |
| 137 | } |
| 138 | Blob output; |
| 139 | output.reserve(uncompressed_size); |
| 140 | size_t compressed_offset = 0; |
| 141 | for (const auto& block : block_info) { |
| 142 | std::string_view cluster = |
| 143 | blob.substr(compressed_offset, block.compressed_length); |
| 144 | if (!block.IsCompressed()) { |
| 145 | CHECK_NE(cluster.size(), 0UL); |
| 146 | output.insert(output.end(), cluster.begin(), cluster.end()); |
| 147 | compressed_offset += cluster.size(); |
| 148 | continue; |
| 149 | } |
| 150 | size_t inputmargin = 0; |
| 151 | if (zero_padding_enabled) { |
Greg Kaiser | 4203702 | 2022-01-04 08:48:18 -0800 | [diff] [blame] | 152 | while (inputmargin < std::min(kBlockSize, cluster.size()) && |
| 153 | cluster[inputmargin] == 0) { |
Kelvin Zhang | 5562403 | 2021-12-20 12:13:24 -0800 | [diff] [blame] | 154 | inputmargin++; |
| 155 | } |
| 156 | } |
| 157 | output.resize(output.size() + block.uncompressed_length); |
| 158 | |
| 159 | const auto bytes_decompressed = LZ4_decompress_safe_partial( |
| 160 | cluster.data() + inputmargin, |
| 161 | reinterpret_cast<char*>(output.data()) + output.size() - |
| 162 | block.uncompressed_length, |
| 163 | cluster.size() - inputmargin, |
| 164 | block.uncompressed_length, |
| 165 | block.uncompressed_length); |
| 166 | if (bytes_decompressed < 0) { |
| 167 | Blob cluster_hash; |
| 168 | HashCalculator::RawHashOfBytes( |
| 169 | cluster.data(), cluster.size(), &cluster_hash); |
| 170 | Blob blob_hash; |
| 171 | HashCalculator::RawHashOfBytes(blob.data(), blob.size(), &blob_hash); |
| 172 | LOG(FATAL) << "Failed to decompress, " << bytes_decompressed |
| 173 | << ", output_cursor = " |
| 174 | << output.size() - block.uncompressed_length |
| 175 | << ", input_cursor = " << compressed_offset |
| 176 | << ", blob.size() = " << blob.size() |
| 177 | << ", cluster_size = " << block.compressed_length |
| 178 | << ", dest capacity = " << block.uncompressed_length |
| 179 | << ", input margin = " << inputmargin << " " |
| 180 | << HexEncode(cluster_hash) << " " << HexEncode(blob_hash); |
| 181 | return {}; |
| 182 | } |
| 183 | compressed_offset += block.compressed_length; |
| 184 | CHECK_EQ(static_cast<uint64_t>(bytes_decompressed), |
| 185 | block.uncompressed_length); |
| 186 | } |
| 187 | CHECK_EQ(output.size(), uncompressed_size); |
| 188 | |
| 189 | // Trailing data not recorded by compressed block info will be treated as |
| 190 | // uncompressed, most of the time these are xattrs or trailing zeros. |
| 191 | CHECK_EQ(blob.size(), compressed_offset) |
| 192 | << " Unexpected data the end of compressed data "; |
| 193 | if (compressed_offset < blob.size()) { |
| 194 | output.insert(output.end(), blob.begin() + compressed_offset, blob.end()); |
| 195 | } |
| 196 | |
| 197 | return output; |
| 198 | } |
| 199 | |
| 200 | [[nodiscard]] std::string_view ToStringView(const Blob& blob) noexcept { |
| 201 | return std::string_view{reinterpret_cast<const char*>(blob.data()), |
| 202 | blob.size()}; |
| 203 | } |
| 204 | |
| 205 | Blob TryDecompressBlob(const Blob& blob, |
| 206 | const std::vector<CompressedBlock>& block_info, |
| 207 | const bool zero_padding_enabled) { |
| 208 | return TryDecompressBlob( |
| 209 | ToStringView(blob), block_info, zero_padding_enabled); |
| 210 | } |
| 211 | |
| 212 | std::ostream& operator<<(std::ostream& out, const CompressedBlock& block) { |
| 213 | out << "CompressedBlock{.uncompressed_offset = " << block.uncompressed_offset |
| 214 | << ", .compressed_length = " << block.compressed_length |
| 215 | << ", .uncompressed_length = " << block.uncompressed_length << "}"; |
| 216 | return out; |
| 217 | } |
| 218 | |
| 219 | [[nodiscard]] std::string_view ToStringView(const void* data, |
| 220 | size_t size) noexcept { |
| 221 | return std::string_view(reinterpret_cast<const char*>(data), size); |
| 222 | } |
| 223 | |
| 224 | std::ostream& operator<<(std::ostream& out, const CompressedBlockInfo& info) { |
| 225 | out << "BlockInfo { compressed_length: " << info.compressed_length() |
| 226 | << ", uncompressed_length: " << info.uncompressed_length() |
| 227 | << ", uncompressed_offset: " << info.uncompressed_offset(); |
| 228 | if (!info.sha256_hash().empty()) { |
| 229 | out << ", sha256_hash: " << HexEncode(info.sha256_hash()); |
| 230 | } |
| 231 | if (!info.postfix_bspatch().empty()) { |
| 232 | out << ", postfix_bspatch: " << info.postfix_bspatch().size(); |
| 233 | } |
| 234 | out << "}"; |
| 235 | return out; |
| 236 | } |
| 237 | |
| 238 | } // namespace chromeos_update_engine |