Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

18

import common

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

19

import copy

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

20

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

21

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

22

import itertools

23

import multiprocessing

24

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

25

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

27

import subprocess

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

28

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

29

import threading

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

30

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

31

from collections import deque, OrderedDict

32

from hashlib import sha1

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

33

from rangelib import RangeSet

34

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

35

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

36

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

37

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

38

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

39

def compute_patch(srcfile, tgtfile, imgdiff=False):

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

40

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

41

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

42

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

43

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

44

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

45

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

46

# here, since they contain temp filenames only.

47

p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,

48

stderr=subprocess.STDOUT)

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

49

output, _ = p.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

50

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

51

if p.returncode != 0:

52

raise ValueError(output)

53

54

with open(patchfile, 'rb') as f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

55

return f.read()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

56

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

57

58

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

59

def RangeSha1(self, ranges):

60

raise NotImplementedError

61

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

62

def ReadRangeSet(self, ranges):

63

raise NotImplementedError

64

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

65

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

66

raise NotImplementedError

67

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

68

def WriteRangeDataToFd(self, ranges, fd):

69

raise NotImplementedError

70

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

71

72

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

73

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

74

75

def __init__(self):

76

self.blocksize = 4096

77

self.care_map = RangeSet()

78

self.clobbered_blocks = RangeSet()

79

self.extended = RangeSet()

80

self.total_blocks = 0

81

self.file_map = {}

82

83

def RangeSha1(self, ranges):

84

return sha1().hexdigest()

85

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

86

def ReadRangeSet(self, ranges):

87

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

88

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

89

def TotalSha1(self, include_clobbered_blocks=False):

90

# EmptyImage always carries empty clobbered_blocks, so

91

# include_clobbered_blocks can be ignored.

92

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

93

return sha1().hexdigest()

94

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

95

def WriteRangeDataToFd(self, ranges, fd):

96

raise ValueError("Can't write data from EmptyImage to file")

97

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

98

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

99

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

100

"""An image wrapped around a single string of data."""

101

102

def __init__(self, data, trim=False, pad=False):

103

self.data = data

104

self.blocksize = 4096

105

106

assert not (trim and pad)

107

108

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

109

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

110

if partial > 0:

111

if trim:

112

self.data = self.data[:-partial]

113

elif pad:

114

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

115

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

116

else:

117

raise ValueError(("data for DataImage must be multiple of %d bytes "

118

"unless trim or pad is specified") %

119

(self.blocksize,))

120

121

assert len(self.data) % self.blocksize == 0

122

123

self.total_blocks = len(self.data) / self.blocksize

124

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

125

# When the last block is padded, we always write the whole block even for

126

# incremental OTAs. Because otherwise the last block may get skipped if

127

# unchanged for an incremental, but would fail the post-install

128

# verification if it has non-zero contents in the padding bytes.

129

# Bug: 23828506

130

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

131

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

132

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

133

clobbered_blocks = []

134

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

135

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

140

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

141

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

142

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

143

if d == reference:

144

zero_blocks.append(i)

145

zero_blocks.append(i+1)

146

else:

147

nonzero_blocks.append(i)

148

nonzero_blocks.append(i+1)

149

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

150

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

151

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

152

self.file_map = dict()

153

if zero_blocks:

154

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

155

if nonzero_blocks:

156

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

157

if clobbered_blocks:

158

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

159

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

160

def _GetRangeData(self, ranges):

161

for s, e in ranges:

162

yield self.data[s*self.blocksize:e*self.blocksize]

163

164

def RangeSha1(self, ranges):

165

h = sha1()

166

for data in self._GetRangeData(ranges):

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

170

def ReadRangeSet(self, ranges):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

171

return [self._GetRangeData(ranges)]

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

172

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

173

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

174

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

175

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

176

else:

177

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

178

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

179

def WriteRangeDataToFd(self, ranges, fd):

180

for data in self._GetRangeData(ranges):

181

fd.write(data)

182

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

183

184

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

185

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

186

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

187

self.tgt_name = tgt_name

188

self.src_name = src_name

189

self.tgt_ranges = tgt_ranges

190

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

191

self.tgt_sha1 = tgt_sha1

192

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

193

self.style = style

Tao Bao

fe97dbd

2018-02-06 15:01:54 -0800

[diff] [blame^]

194

self.intact = tgt_ranges.monotonic and src_ranges.monotonic

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

195

196

# We use OrderedDict rather than dict so that the output is repeatable;

197

# otherwise it would depend on the hash values of the Transfer objects.

198

self.goes_before = OrderedDict()

199

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

200

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

201

self.stash_before = []

202

self.use_stash = []

203

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

self._patch = None

@property

def patch(self):

return self._patch

@patch.setter

def patch(self, patch):

215

if patch:

216

assert self.style == "diff"

217

self._patch = patch

218

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

219

def NetStashChange(self):

220

return (sum(sr.size() for (_, sr) in self.stash_before) -

221

sum(sr.size() for (_, sr) in self.use_stash))

222

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

223

def ConvertToNew(self):

224

assert self.style != "new"

225

self.use_stash = []

226

self.style = "new"

227

self.src_ranges = RangeSet()

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

228

self.patch = None

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

229

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

230

def __str__(self):

231

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

232

" to " + str(self.tgt_ranges) + ">")

233

234

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

235

@functools.total_ordering

236

class HeapItem(object):

237

def __init__(self, item):

238

self.item = item

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

239

# Negate the score since python's heap is a min-heap and we want the

240

# maximum score.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

241

self.score = -item.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

242

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

243

def clear(self):

244

self.item = None

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

245

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

246

def __bool__(self):

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

247

return self.item is not None

248

249

# Python 2 uses __nonzero__, while Python 3 uses __bool__.

250

__nonzero__ = __bool__

251

252

# The rest operations are generated by functools.total_ordering decorator.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

253

def __eq__(self, other):

254

return self.score == other.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

255

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

256

def __le__(self, other):

257

return self.score <= other.score

258

259

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

260

# BlockImageDiff works on two image objects. An image object is

261

# anything that provides the following attributes:

262

#

263

# blocksize: the size in bytes of a block, currently must be 4096.

264

#

265

# total_blocks: the total size of the partition/image, in blocks.

266

#

267

# care_map: a RangeSet containing which blocks (in the range [0,

268

# total_blocks) we actually care about; i.e. which blocks contain

269

# data.

270

#

271

# file_map: a dict that partitions the blocks contained in care_map

272

# into smaller domains that are useful for doing diffs on.

273

# (Typically a domain is a file, and the key in file_map is the

274

# pathname.)

275

#

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

276

# clobbered_blocks: a RangeSet containing which blocks contain data

277

# but may be altered by the FS. They need to be excluded when

278

# verifying the partition integrity.

279

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

280

# ReadRangeSet(): a function that takes a RangeSet and returns the

281

# data contained in the image blocks of that RangeSet. The data

282

# is returned as a list or tuple of strings; concatenating the

283

# elements together should produce the requested data.

284

# Implementations are free to break up the data into list/tuple

285

# elements in any way that is convenient.

286

#

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

287

# RangeSha1(): a function that returns (as a hex string) the SHA-1

288

# hash of all the data in the specified range.

289

#

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

290

# TotalSha1(): a function that returns (as a hex string) the SHA-1

291

# hash of all the data in the image (ie, all the blocks in the

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

292

# care_map minus clobbered_blocks, or including the clobbered

293

# blocks if include_clobbered_blocks is True).

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

294

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

295

# When creating a BlockImageDiff, the src image may be None, in which

296

# case the list of transfers produced will never read from the

297

# original image.

298

299

class BlockImageDiff(object):

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

300

def __init__(self, tgt, src=None, threads=None, version=4,

301

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

302

if threads is None:

303

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

304

if threads == 0:

305

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

306

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

307

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

308

self.transfers = []

309

self.src_basenames = {}

310

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

311

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

312

self.touched_src_ranges = RangeSet()

313

self.touched_src_sha1 = None

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

314

self.disable_imgdiff = disable_imgdiff

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

315

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

316

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

324

assert tgt.blocksize == 4096

325

assert src.blocksize == 4096

326

327

# The range sets in each filemap should comprise a partition of

328

# the care map.

329

self.AssertPartition(src.care_map, src.file_map.values())

330

self.AssertPartition(tgt.care_map, tgt.file_map.values())

331

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

332

@property

333

def max_stashed_size(self):

334

return self._max_stashed_size

335

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

336

def Compute(self, prefix):

337

# When looking for a source file to use as the diff input for a

338

# target file, we try:

339

# 1) an exact path match if available, otherwise

340

# 2) a exact basename match if available, otherwise

341

# 3) a basename match after all runs of digits are replaced by

342

# "#" if available, otherwise

343

# 4) we have no source for this target.

344

self.AbbreviateSourceNames()

345

self.FindTransfers()

346

347

# Find the ordering dependencies among transfers (this is O(n^2)

348

# in the number of transfers).

349

self.GenerateDigraph()

350

# Find a sequence of transfers that satisfies as many ordering

351

# dependencies as possible (heuristically).

352

self.FindVertexSequence()

353

# Fix up the ordering dependencies that the sequence didn't

354

# satisfy.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

355

self.ReverseBackwardEdges()

356

self.ImproveVertexSequence()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

357

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

358

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

359

if common.OPTIONS.cache_size is not None:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

360

self.ReviseStashSize()

361

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

362

# Double-check our work.

363

self.AssertSequenceGood()

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

364

self.AssertSha1Good()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

365

366

self.ComputePatches(prefix)

367

self.WriteTransfers(prefix)

368

369

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

370

def WriteSplitTransfers(out, style, target_blocks):

371

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

372

373

This prevents the target size of one command from being too large; and

374

might help to avoid fsync errors on some devices."""

375

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

376

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

377

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

378

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

379

while target_blocks:

380

blocks_to_write = target_blocks.first(blocks_limit)

381

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

382

total += blocks_to_write.size()

383

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

384

return total

385

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

386

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

387

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

388

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

389

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

390

# id. 'stashes' records the map from 'hash' to the ref count. The stash

391

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

392

stashes = {}

393

stashed_blocks = 0

394

max_stashed_blocks = 0

395

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

396

for xf in self.transfers:

397

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

398

for _, sr in xf.stash_before:

399

sh = self.src.RangeSha1(sr)

400

if sh in stashes:

401

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

402

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

403

stashes[sh] = 1

404

stashed_blocks += sr.size()

405

self.touched_src_ranges = self.touched_src_ranges.union(sr)

406

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

407

408

if stashed_blocks > max_stashed_blocks:

409

max_stashed_blocks = stashed_blocks

410

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

411

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

412

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

413

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

414

# <# blocks> <src ranges>

415

# OR

416

# <# blocks> <src ranges> <src locs> <stash refs...>

417

# OR

418

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

419

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

420

size = xf.src_ranges.size()

421

src_str = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

422

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

423

unstashed_src_ranges = xf.src_ranges

424

mapped_stashes = []

425

for _, sr in xf.use_stash:

426

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

427

sh = self.src.RangeSha1(sr)

428

sr = xf.src_ranges.map_within(sr)

429

mapped_stashes.append(sr)

430

assert sh in stashes

431

src_str.append("%s:%s" % (sh, sr.to_string_raw()))

432

stashes[sh] -= 1

433

if stashes[sh] == 0:

434

free_string.append("free %s\n" % (sh,))

435

free_size += sr.size()

436

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

437

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

438

if unstashed_src_ranges:

439

src_str.insert(1, unstashed_src_ranges.to_string_raw())

440

if xf.use_stash:

441

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

442

src_str.insert(2, mapped_unstashed.to_string_raw())

443

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

444

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

445

else:

446

src_str.insert(1, "-")

447

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

448

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

449

src_str = " ".join(src_str)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

450

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

451

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

452

# zero <rangeset>

453

# new <rangeset>

454

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

455

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

456

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

457

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

458

459

tgt_size = xf.tgt_ranges.size()

460

461

if xf.style == "new":

462

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

463

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

464

total += tgt_size

465

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

466

assert xf.tgt_ranges

467

assert xf.src_ranges.size() == tgt_size

468

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

469

# take into account automatic stashing of overlapping blocks

470

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

471

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

472

if temp_stash_usage > max_stashed_blocks:

473

max_stashed_blocks = temp_stash_usage

474

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

475

self.touched_src_ranges = self.touched_src_ranges.union(

476

xf.src_ranges)

477

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

478

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

479

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

480

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

481

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

482

total += tgt_size

483

elif xf.style in ("bsdiff", "imgdiff"):

484

assert xf.tgt_ranges

485

assert xf.src_ranges

486

# take into account automatic stashing of overlapping blocks

487

if xf.src_ranges.overlaps(xf.tgt_ranges):

488

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

489

if temp_stash_usage > max_stashed_blocks:

490

max_stashed_blocks = temp_stash_usage

491

492

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

493

494

out.append("%s %d %d %s %s %s %s\n" % (

495

xf.style,

496

xf.patch_start, xf.patch_len,

497

xf.src_sha1,

498

xf.tgt_sha1,

499

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

500

total += tgt_size

501

elif xf.style == "zero":

502

assert xf.tgt_ranges

503

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

504

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

505

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

506

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

507

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

508

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

509

if free_string:

510

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

511

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

512

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

513

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

514

# Sanity check: abort if we're going to need more stash space than

515

# the allowed size (cache_size * threshold). There are two purposes

516

# of having a threshold here. a) Part of the cache may have been

517

# occupied by some recovery logs. b) It will buy us some time to deal

518

# with the oversize issue.

519

cache_size = common.OPTIONS.cache_size

520

stash_threshold = common.OPTIONS.stash_threshold

521

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

522

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

523

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

524

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

525

self.tgt.blocksize, max_allowed, cache_size,

526

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

527

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

528

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

529

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

530

# Zero out extended blocks as a workaround for bug 20881595.

531

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

532

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

533

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

534

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

535

536

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

537

# data in the new image; b) will not be touched by dm-verity. Out of those

538

# blocks, we erase the ones that won't be used in this update at the

539

# beginning of an update. The rest would be erased at the end. This is to

540

# work around the eMMC issue observed on some devices, which may otherwise

541

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

542

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

543

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

544

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

545

546

erase_first = new_dontcare.subtract(self.touched_src_ranges)

547

if erase_first:

548

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

549

550

erase_last = new_dontcare.subtract(erase_first)

551

if erase_last:

552

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

553

554

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

555

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

556

# v3+: the number of stash slots is unused.

557

out.insert(2, "0\n")

558

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

559

560

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

564

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

565

OPTIONS = common.OPTIONS

566

if OPTIONS.cache_size is not None:

567

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

568

print("max stashed blocks: %d (%d bytes), "

569

"limit: %d bytes (%.2f%%)\n" % (

570

max_stashed_blocks, self._max_stashed_size, max_allowed,

571

self._max_stashed_size * 100.0 / max_allowed))

572

else:

573

print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (

574

max_stashed_blocks, self._max_stashed_size))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

575

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

576

def ReviseStashSize(self):

577

print("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

578

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

579

580

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

581

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

582

for xf in self.transfers:

583

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

584

for stash_raw_id, sr in xf.stash_before:

585

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

586

587

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

588

for stash_raw_id, _ in xf.use_stash:

589

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

590

591

# Compute the maximum blocks available for stash based on /cache size and

592

# the threshold.

593

cache_size = common.OPTIONS.cache_size

594

stash_threshold = common.OPTIONS.stash_threshold

595

max_allowed = cache_size * stash_threshold / self.tgt.blocksize

596

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

597

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

598

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

599

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

600

new_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

601

602

# Now go through all the commands. Compute the required stash size on the

603

# fly. If a command requires excess stash than available, it deletes the

604

# stash by replacing the command that uses the stash with a "new" command

605

# instead.

606

for xf in self.transfers:

607

replaced_cmds = []

608

609

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

610

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

611

# Check the post-command stashed_blocks.

612

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

613

sh = self.src.RangeSha1(sr)

614

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

615

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

616

617

if stashed_blocks_after > max_allowed:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

618

# We cannot stash this one for a later command. Find out the command

619

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

620

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

621

replaced_cmds.append(use_cmd)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

622

print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

623

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

624

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

625

if sh in stashes:

626

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

627

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

628

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

629

stashed_blocks = stashed_blocks_after

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

630

631

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

632

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

633

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

634

assert xf.tgt_ranges and xf.src_ranges

635

if xf.src_ranges.overlaps(xf.tgt_ranges):

636

if stashed_blocks + xf.src_ranges.size() > max_allowed:

637

replaced_cmds.append(xf)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

638

print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

639

640

# Replace the commands in replaced_cmds with "new"s.

641

for cmd in replaced_cmds:

642

# It no longer uses any commands in "use_stash". Remove the def points

643

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

644

for stash_raw_id, sr in cmd.use_stash:

645

def_cmd = stash_map[stash_raw_id][1]

646

assert (stash_raw_id, sr) in def_cmd.stash_before

647

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

648

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

649

# Add up blocks that violates space limit and print total number to

650

# screen later.

651

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

652

cmd.ConvertToNew()

653

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

654

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

655

for _, sr in xf.use_stash:

656

sh = self.src.RangeSha1(sr)

657

assert sh in stashes

658

stashes[sh] -= 1

659

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

660

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

661

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

662

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

663

num_of_bytes = new_blocks * self.tgt.blocksize

664

print(" Total %d blocks (%d bytes) are packed as new blocks due to "

665

"insufficient cache size." % (new_blocks, num_of_bytes))

Tao Bao

304ee27

2016-12-19 11:01:38 -0800

[diff] [blame]

666

return new_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

667

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

668

def ComputePatches(self, prefix):

669

print("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

670

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

671

patch_num = 0

672

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

673

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

674

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

675

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

676

print("%10d %10d (%6.2f%%) %7s %s %s" % (

677

tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,

678

str(xf.tgt_ranges)))

679

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

680

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

681

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

682

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

683

print("%10d %10d (%6.2f%%) %7s %s %s" % (

684

tgt_size, tgt_size, 100.0, xf.style,

685

xf.tgt_name, str(xf.tgt_ranges)))

686

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

687

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

688

# We can't compare src and tgt directly because they may have

689

# the same content but be broken up into blocks differently, eg:

690

#

691

# ["he", "llo"] vs ["h", "ello"]

692

#

693

# We want those to compare equal, ideally without having to

694

# actually concatenate the strings (these may be tens of

695

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

696

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

697

# These are identical; we don't need to generate a patch,

698

# just issue copy commands on the device.

699

xf.style = "move"

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

700

xf.patch = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

701

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

702

if xf.src_ranges != xf.tgt_ranges:

703

print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (

704

tgt_size, tgt_size, 100.0, xf.style,

705

xf.tgt_name if xf.tgt_name == xf.src_name else (

706

xf.tgt_name + " (from " + xf.src_name + ")"),

707

str(xf.tgt_ranges), str(xf.src_ranges)))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

708

else:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

709

if xf.patch:

710

# We have already generated the patch with imgdiff. Check if the

711

# transfer is intact.

712

assert not self.disable_imgdiff

imgdiff = True

if not xf.intact:

imgdiff = False

xf.patch = None

else:

# For files in zip format (eg, APKs, JARs, etc.) we would

719

# like to use imgdiff -z if possible (because it usually

720

# produces significantly smaller patches than bsdiff).

721

# This is permissible if:

722

#

723

# - imgdiff is not disabled, and

724

# - the source and target files are monotonic (ie, the

725

# data is stored with blocks in increasing order), and

726

# - we haven't removed any blocks from the source set.

727

#

728

# If these conditions are satisfied then appending all the

729

# blocks in the set together in order will produce a valid

730

# zip file (plus possibly extra zeros in the last block),

731

# which is what imgdiff needs to operate. (imgdiff is

732

# fine with extra zeros at the end of the file.)

733

imgdiff = (not self.disable_imgdiff and xf.intact and

734

xf.tgt_name.split(".")[-1].lower()

735

in ("apk", "jar", "zip"))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

736

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

737

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

742

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

743

if diff_queue:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

744

if self.threads > 1:

745

print("Computing patches (using %d threads)..." % (self.threads,))

746

else:

747

print("Computing patches...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

748

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

749

diff_total = len(diff_queue)

750

patches = [None] * diff_total

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

751

error_messages = []

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

752

warning_messages = []

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

753

if sys.stdout.isatty():

754

global diff_done

755

diff_done = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

756

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

757

# Using multiprocessing doesn't give additional benefits, due to the

758

# pattern of the code. The diffing work is done by subprocess.call, which

759

# already runs in a separate process (not affected much by the GIL -

760

# Global Interpreter Lock). Using multiprocess also requires either a)

761

# writing the diff input files in the main process before forking, or b)

762

# reopening the image file (SparseImage) in the worker processes. Doing

763

# neither of them further improves the performance.

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

764

lock = threading.Lock()

765

def diff_worker():

766

while True:

767

with lock:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

768

if not diff_queue:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

769

return

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

770

xf_index, imgdiff, patch_index = diff_queue.pop()

771

772

xf = self.transfers[xf_index]

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

773

patch = xf.patch

774

if not patch:

775

src_ranges = xf.src_ranges

776

tgt_ranges = xf.tgt_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

777

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

778

src_file = common.MakeTempFile(prefix="src-")

779

with open(src_file, "wb") as fd:

780

self.src.WriteRangeDataToFd(src_ranges, fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

781

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

782

tgt_file = common.MakeTempFile(prefix="tgt-")

783

with open(tgt_file, "wb") as fd:

784

self.tgt.WriteRangeDataToFd(tgt_ranges, fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

message = []

try:

patch = compute_patch(src_file, tgt_file, imgdiff)

789

except ValueError as e:

790

message.append(

791

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

792

"imgdiff" if imgdiff else "bsdiff",

793

xf.tgt_name if xf.tgt_name == xf.src_name else

794

xf.tgt_name + " (from " + xf.src_name + ")",

795

xf.tgt_ranges, xf.src_ranges, e.message))

796

# TODO(b/68016761): Better handle the holes in mke2fs created

# images.

if imgdiff:

try:

patch = compute_patch(src_file, tgt_file, imgdiff=False)

801

message.append(

802

"Fell back and generated with bsdiff instead for %s" % (

xf.tgt_name,))

xf.style = "bsdiff"

with lock:

warning_messages.extend(message)

807

del message[:]

808

except ValueError as e:

809

message.append(

810

"Also failed to generate with bsdiff for %s:\n%s" % (

811

xf.tgt_name, e.message))

if message:

with lock:

error_messages.extend(message)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

816

817

with lock:

818

patches[patch_index] = (xf_index, patch)

819

if sys.stdout.isatty():

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

820

global diff_done

821

diff_done += 1

822

progress = diff_done * 100 / diff_total

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

823

# '\033[K' is to clear to EOL.

824

print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')

825

sys.stdout.flush()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

826

827

threads = [threading.Thread(target=diff_worker)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

828

for _ in range(self.threads)]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

833

834

if sys.stdout.isatty():

835

print('\n')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

836

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

837

if warning_messages:

838

print('WARNING:')

839

print('\n'.join(warning_messages))

840

print('\n\n\n')

841

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

842

if error_messages:

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

843

print('ERROR:')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

844

print('\n'.join(error_messages))

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

845

print('\n\n\n')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

846

sys.exit(1)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

else:

patches = []

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

850

offset = 0

851

with open(prefix + ".patch.dat", "wb") as patch_fd:

852

for index, patch in patches:

853

xf = self.transfers[index]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

854

xf.patch_len = len(patch)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

855

xf.patch_start = offset

856

offset += xf.patch_len

857

patch_fd.write(patch)

858

859

if common.OPTIONS.verbose:

860

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

861

print("%10d %10d (%6.2f%%) %7s %s %s %s" % (

862

xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,

863

xf.style,

864

xf.tgt_name if xf.tgt_name == xf.src_name else (

865

xf.tgt_name + " (from " + xf.src_name + ")"),

866

xf.tgt_ranges, xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

867

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

868

def AssertSha1Good(self):

869

"""Check the SHA-1 of the src & tgt blocks in the transfer list.

870

871

Double check the SHA-1 value to avoid the issue in b/71908713, where

872

SparseImage.RangeSha1() messed up with the hash calculation in multi-thread

873

environment. That specific problem has been fixed by protecting the

874

underlying generator function 'SparseImage._GetRangeData()' with lock.

875

"""

876

for xf in self.transfers:

877

tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)

878

assert xf.tgt_sha1 == tgt_sha1

879

if xf.style == "diff":

880

src_sha1 = self.src.RangeSha1(xf.src_ranges)

881

assert xf.src_sha1 == src_sha1

882

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

883

def AssertSequenceGood(self):

884

# Simulate the sequences of transfers we will output, and check that:

885

# - we never read a block after writing it, and

886

# - we write every block we care about exactly once.

887

888

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

889

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

890

891

# Imagine processing the transfers in order.

892

for xf in self.transfers:

893

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

894

895

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

896

for _, sr in xf.use_stash:

897

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

898

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

899

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

900

# Source image could be larger. Don't check the blocks that are in the

901

# source image only. Since they are not in 'touched', and won't ever

902

# be touched.

903

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

904

assert touched[i] == 0

905

906

# Check that the output blocks for this transfer haven't yet

907

# been touched, and touch all the blocks written by this

908

# transfer.

909

for s, e in xf.tgt_ranges:

910

for i in range(s, e):

911

assert touched[i] == 0

912

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

913

914

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

915

for s, e in self.tgt.care_map:

916

for i in range(s, e):

917

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

918

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

919

def ImproveVertexSequence(self):

920

print("Improving vertex order...")

921

922

# At this point our digraph is acyclic; we reversed any edges that

923

# were backwards in the heuristically-generated sequence. The

924

# previously-generated order is still acceptable, but we hope to

925

# find a better order that needs less memory for stashed data.

926

# Now we do a topological sort to generate a new vertex order,

927

# using a greedy algorithm to choose which vertex goes next

928

# whenever we have a choice.

929

930

# Make a copy of the edge set; this copy will get destroyed by the

931

# algorithm.

932

for xf in self.transfers:

933

xf.incoming = xf.goes_after.copy()

934

xf.outgoing = xf.goes_before.copy()

935

936

L = [] # the new vertex order

937

938

# S is the set of sources in the remaining graph; we always choose

939

# the one that leaves the least amount of stashed data after it's

940

# executed.

941

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

947

L.append(xf)

948

for u in xf.outgoing:

949

del u.incoming[xf]

950

if not u.incoming:

951

heapq.heappush(S, (u.NetStashChange(), u.order, u))

952

953

# if this fails then our graph had a cycle.

954

assert len(L) == len(self.transfers)

955

956

self.transfers = L

957

for i, xf in enumerate(L):

958

xf.order = i

959

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

960

def RemoveBackwardEdges(self):

961

print("Removing backward edges...")

in_order = 0

out_of_order = 0

lost_source = 0

for xf in self.transfers:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

967

lost = 0

968

size = xf.src_ranges.size()

969

for u in xf.goes_before:

970

# xf should go before u

971

if xf.order < u.order:

972

# it does, hurray!

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

973

in_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

974

else:

975

# it doesn't, boo. trim the blocks that u writes from xf's

976

# source, so that xf can go after u.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

977

out_of_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

978

assert xf.src_ranges.overlaps(u.tgt_ranges)

979

xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)

980

xf.intact = False

981

982

if xf.style == "diff" and not xf.src_ranges:

983

# nothing left to diff from; treat as new data

984

xf.style = "new"

985

986

lost = size - xf.src_ranges.size()

987

lost_source += lost

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

988

989

print((" %d/%d dependencies (%.2f%%) were violated; "

990

"%d source blocks removed.") %

991

(out_of_order, in_order + out_of_order,

992

(out_of_order * 100.0 / (in_order + out_of_order))

993

if (in_order + out_of_order) else 0.0,

994

lost_source))

995

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

996

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

997

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

998

999

For each transfer, make sure it properly stashes the blocks it touches and

1000

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

1001

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

1002

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

1003

it is possible to have multiple pairs with different 'stash_raw_id's. Each

1004

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

1005

blocks will be written to the same stash slot in WriteTransfers().

1006

"""

1007

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1008

print("Reversing backward edges...")

1009

in_order = 0

1010

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1011

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1012

stash_size = 0

1013

1014

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1015

for u in xf.goes_before.copy():

1016

# xf should go before u

1017

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

1022

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

1027

assert overlap

1028

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1029

u.stash_before.append((stash_raw_id, overlap))

1030

xf.use_stash.append((stash_raw_id, overlap))

1031

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1032

stash_size += overlap.size()

1033

1034

# reverse the edge direction; now xf must go after u

1035

del xf.goes_before[u]

1036

del u.goes_after[xf]

1037

xf.goes_after[u] = None # value doesn't matter

1038

u.goes_before[xf] = None

1039

1040

print((" %d/%d dependencies (%.2f%%) were violated; "

1041

"%d source blocks stashed.") %

1042

(out_of_order, in_order + out_of_order,

1043

(out_of_order * 100.0 / (in_order + out_of_order))

1044

if (in_order + out_of_order) else 0.0,

1045

stash_size))

1046

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1047

def FindVertexSequence(self):

1048

print("Finding vertex sequence...")

1049

1050

# This is based on "A Fast & Effective Heuristic for the Feedback

1051

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1052

# it as starting with the digraph G and moving all the vertices to

1053

# be on a horizontal line in some order, trying to minimize the

1054

# number of edges that end up pointing to the left. Left-pointing

1055

# edges will get removed to turn the digraph into a DAG. In this

1056

# case each edge has a weight which is the number of source blocks

1057

# we'll lose if that edge is removed; we try to minimize the total

1058

# weight rather than just the number of edges.

1059

1060

# Make a copy of the edge set; this copy will get destroyed by the

1061

# algorithm.

1062

for xf in self.transfers:

1063

xf.incoming = xf.goes_after.copy()

1064

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1065

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1066

1067

# We use an OrderedDict instead of just a set so that the output

1068

# is repeatable; otherwise it would depend on the hash values of

1069

# the transfer objects.

1070

G = OrderedDict()

1071

for xf in self.transfers:

1072

G[xf] = None

1073

s1 = deque() # the left side of the sequence, built from left to right

1074

s2 = deque() # the right side of the sequence, built from right to left

1075

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1076

heap = []

1077

for xf in self.transfers:

1078

xf.heap_item = HeapItem(xf)

1079

heap.append(xf.heap_item)

1080

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1081

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1082

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1083

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1084

# { key1: None, key2: None, ... }.

1085

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1086

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1087

1088

def adjust_score(iu, delta):

1089

iu.score += delta

1090

iu.heap_item.clear()

1091

iu.heap_item = HeapItem(iu)

1092

heapq.heappush(heap, iu.heap_item)

1093

1094

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1095

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1096

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1097

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1098

for u in sinks:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1099

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1100

s2.appendleft(u)

1101

del G[u]

1102

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1103

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1104

if not iu.outgoing:

1105

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1106

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1107

1108

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1109

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1110

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1111

for u in sources:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1112

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1113

s1.append(u)

1114

del G[u]

1115

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1116

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1117

if not iu.incoming:

1118

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1119

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1120

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1121

if not G: break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1122

1123

# Find the "best" vertex to put next. "Best" is the one that

1124

# maximizes the net difference in source blocks saved we get by

1125

# pretending it's a source rather than a sink.

1126

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1127

while True:

1128

u = heapq.heappop(heap)

1129

if u and u.item in G:

1130

u = u.item

1131

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1132

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1133

s1.append(u)

1134

del G[u]

1135

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1136

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1137

if not iu.incoming:

1138

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1139

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1140

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1141

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1142

if not iu.outgoing:

1143

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1144

1145

# Now record the sequence in the 'order' field of each transfer,

1146

# and by rearranging self.transfers to be in the chosen sequence.

1147

1148

new_transfers = []

1149

for x in itertools.chain(s1, s2):

1150

x.order = len(new_transfers)

1151

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1156

1157

def GenerateDigraph(self):

1158

print("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1159

1160

# Each item of source_ranges will be:

1161

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1162

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1163

source_ranges = []

1164

for b in self.transfers:

1165

for s, e in b.src_ranges:

1166

if e > len(source_ranges):

1167

source_ranges.extend([None] * (e-len(source_ranges)))

1168

for i in range(s, e):

1169

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1170

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1171

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1172

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1173

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1174

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1175

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1176

for s, e in a.tgt_ranges:

1177

for i in range(s, e):

1178

if i >= len(source_ranges): break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1179

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1180

if source_ranges[i] is not None:

1181

for j in source_ranges[i]:

1182

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1183

1184

for b in intersections:

1185

if a is b: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1186

1187

# If the blocks written by A are read by B, then B needs to go before A.

1188

i = a.tgt_ranges.intersect(b.src_ranges)

1189

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1190

if b.src_name == "__ZERO":

1191

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1196

b.goes_before[a] = size

1197

a.goes_after[b] = size

1198

1199

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1200

"""Parse the file_map to generate all the transfers."""

1201

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1202

def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1203

src_ranges, style, by_id):

1204

"""Add one or multiple Transfer()s by splitting large files.

1205

1206

For BBOTA v3, we need to stash source blocks for resumable feature.

1207

However, with the growth of file size and the shrink of the cache

1208

partition source blocks are too large to be stashed. If a file occupies

1209

too many blocks, we split it into smaller pieces by getting multiple

1210

Transfer()s.

1211

1212

The downside is that after splitting, we may increase the package size

1213

since the split pieces don't align well. According to our experiments,

1214

1/8 of the cache size as the per-piece limit appears to be optimal.

1215

Compared to the fixed 1024-block limit, it reduces the overall package

1216

size by 30% for volantis, and 20% for angler and bullhead."""

1217

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1218

pieces = 0

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1219

while (tgt_ranges.size() > max_blocks_per_transfer and

1220

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1221

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1222

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1223

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1224

src_first = src_ranges.first(max_blocks_per_transfer)

1225

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1226

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1227

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1228

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1229

1230

tgt_ranges = tgt_ranges.subtract(tgt_first)

1231

src_ranges = src_ranges.subtract(src_first)

1232

pieces += 1

1233

1234

# Handle remaining blocks.

1235

if tgt_ranges.size() or src_ranges.size():

1236

# Must be both non-empty.

1237

assert tgt_ranges.size() and src_ranges.size()

1238

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1239

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1240

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1241

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1242

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1243

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1244

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,

1245

by_id):

1246

"""Find all the zip files and split the others with a fixed chunk size.

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1247

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1248

This function will construct a list of zip archives, which will later be

1249

split by imgdiff to reduce the final patch size. For the other files,

1250

we will plainly split them based on a fixed chunk size with the potential

1251

patch size penalty.

1252

"""

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1253

1254

assert style == "diff"

1255

1256

# Change nothing for small files.

1257

if (tgt_ranges.size() <= max_blocks_per_transfer and

1258

src_ranges.size() <= max_blocks_per_transfer):

1259

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1260

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

style, by_id)

return

if tgt_name.split(".")[-1].lower() in ("apk", "jar", "zip"):

1265

split_enable = (not self.disable_imgdiff and src_ranges.monotonic and

1266

tgt_ranges.monotonic)

1267

if split_enable and (self.tgt.RangeSha1(tgt_ranges) !=

1268

self.src.RangeSha1(src_ranges)):

1269

large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))

1270

return

1271

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1272

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1273

src_ranges, style, by_id)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1274

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1275

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1276

split=False):

1277

"""Wrapper function for adding a Transfer()."""

1278

1279

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1280

# otherwise add the Transfer() as is.

1281

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1282

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1283

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1284

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1285

return

1286

1287

# Handle .odex files specially to analyze the block-wise difference. If

1288

# most of the blocks are identical with only few changes (e.g. header),

1289

# we will patch the changed blocks only. This avoids stashing unchanged

1290

# blocks while patching. We limit the analysis to files without size

1291

# changes only. This is to avoid sacrificing the OTA generation cost too

1292

# much.

1293

if (tgt_name.split(".")[-1].lower() == 'odex' and

1294

tgt_ranges.size() == src_ranges.size()):

1295

1296

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1297

# few blocks remain identical, we lose the opportunity to use imgdiff

1298

# that may have better compression ratio than bsdiff.

1299

crop_threshold = 0.5

1300

1301

tgt_skipped = RangeSet()

1302

src_skipped = RangeSet()

1303

tgt_size = tgt_ranges.size()

1304

tgt_changed = 0

1305

for src_block, tgt_block in zip(src_ranges.next_item(),

1306

tgt_ranges.next_item()):

1307

src_rs = RangeSet(str(src_block))

1308

tgt_rs = RangeSet(str(tgt_block))

1309

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1310

tgt_skipped = tgt_skipped.union(tgt_rs)

1311

src_skipped = src_skipped.union(src_rs)

1312

else:

1313

tgt_changed += tgt_rs.size()

1314

1315

# Terminate early if no clear sign of benefits.

1316

if tgt_changed > tgt_size * crop_threshold:

1317

break

1318

1319

if tgt_changed < tgt_size * crop_threshold:

1320

assert tgt_changed + tgt_skipped.size() == tgt_size

1321

print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,

1322

tgt_skipped.size() * 100.0 / tgt_size, tgt_name))

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1323

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1324

"%s-skipped" % (tgt_name,),

1325

"%s-skipped" % (src_name,),

1326

tgt_skipped, src_skipped, style, by_id)

1327

1328

# Intentionally change the file extension to avoid being imgdiff'd as

1329

# the files are no longer in their original format.

1330

tgt_name = "%s-cropped" % (tgt_name,)

1331

src_name = "%s-cropped" % (src_name,)

1332

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1333

src_ranges = src_ranges.subtract(src_skipped)

1334

1335

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1340

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1341

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1342

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1343

def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,

1344

split_info):

1345

"""Parse the split_info and return a list of info tuples.

1346

1347

Args:

1348

patch_size: total size of the patch file.

1349

tgt_ranges: Ranges of the target file within the original image.

1350

src_ranges: Ranges of the source file within the original image.

split_info format:

imgdiff version#

count of pieces

<patch_size_1> <tgt_size_1> <src_ranges_1>

1355

...

1356

<patch_size_n> <tgt_size_n> <src_ranges_n>

1357

1358

Returns:

1359

[patch_start, patch_len, split_tgt_ranges, split_src_ranges]

1360

"""

1361

1362

version = int(split_info[0])

1363

assert version == 2

1364

count = int(split_info[1])

1365

assert len(split_info) - 2 == count

split_info_list = []

patch_start = 0

tgt_remain = copy.deepcopy(tgt_ranges)

1370

# each line has the format <patch_size>, <tgt_size>, <src_ranges>

1371

for line in split_info[2:]:

1372

info = line.split()

1373

assert len(info) == 3

1374

patch_length = int(info[0])

1375

1376

split_tgt_size = int(info[1])

1377

assert split_tgt_size % 4096 == 0

1378

assert split_tgt_size / 4096 <= tgt_remain.size()

1379

split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)

1380

tgt_remain = tgt_remain.subtract(split_tgt_ranges)

1381

1382

# Find the split_src_ranges within the image file from its relative

1383

# position in file.

1384

split_src_indices = RangeSet.parse_raw(info[2])

1385

split_src_ranges = RangeSet()

1386

for r in split_src_indices:

1387

curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))

1388

assert not split_src_ranges.overlaps(curr_range)

1389

split_src_ranges = split_src_ranges.union(curr_range)

1390

1391

split_info_list.append((patch_start, patch_length,

1392

split_tgt_ranges, split_src_ranges))

1393

patch_start += patch_length

1394

1395

# Check that the sizes of all the split pieces add up to the final file

1396

# size for patch and target.

1397

assert tgt_remain.size() == 0

1398

assert patch_start == patch_size

1399

return split_info_list

1400

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1401

def SplitLargeApks():

1402

"""Split the large apks files.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1403

1404

Example: Chrome.apk will be split into

1405

src-0: Chrome.apk-0, tgt-0: Chrome.apk-0

1406

src-1: Chrome.apk-1, tgt-1: Chrome.apk-1

1407

...

1408

1409

After the split, the target pieces are continuous and block aligned; and

1410

the source pieces are mutually exclusive. During the split, we also

1411

generate and save the image patch between src-X & tgt-X. This patch will

1412

be valid because the block ranges of src-X & tgt-X will always stay the

1413

same afterwards; but there's a chance we don't use the patch if we

1414

convert the "diff" command into "new" or "move" later.

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1415

1416

The split will be attempted by calling imgdiff, which expects the input

1417

files to be valid zip archives. If imgdiff fails for some reason (i.e.

1418

holes in the APK file), we will fall back to split the failed APKs into

1419

fixed size chunks.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

"""

while True:

with transfer_lock:

if not large_apks:

return

tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)

1427

1428

src_file = common.MakeTempFile(prefix="src-")

1429

tgt_file = common.MakeTempFile(prefix="tgt-")

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

1430

with open(src_file, "wb") as src_fd:

1431

self.src.WriteRangeDataToFd(src_ranges, src_fd)

1432

with open(tgt_file, "wb") as tgt_fd:

1433

self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1434

1435

patch_file = common.MakeTempFile(prefix="patch-")

1436

patch_info_file = common.MakeTempFile(prefix="split_info-")

1437

cmd = ["imgdiff", "-z",

1438

"--block-limit={}".format(max_blocks_per_transfer),

1439

"--split-info=" + patch_info_file,

1440

src_file, tgt_file, patch_file]

1441

p = common.Run(cmd, stdout=subprocess.PIPE)

1442

p.communicate()

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1443

if p.returncode != 0:

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1444

print("Failed to create patch between {} and {},"

1445

" falling back to bsdiff".format(src_name, tgt_name))

1446

with transfer_lock:

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1447

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name,

1448

tgt_ranges, src_ranges,

1449

"diff", self.transfers)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1450

continue

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1451

1452

with open(patch_info_file) as patch_info:

1453

lines = patch_info.readlines()

1454

1455

patch_size_total = os.path.getsize(patch_file)

1456

split_info_list = ParseAndValidateSplitInfo(patch_size_total,

1457

tgt_ranges, src_ranges,

1458

lines)

1459

for index, (patch_start, patch_length, split_tgt_ranges,

1460

split_src_ranges) in enumerate(split_info_list):

1461

with open(patch_file) as f:

1462

f.seek(patch_start)

1463

patch_content = f.read(patch_length)

1464

1465

split_src_name = "{}-{}".format(src_name, index)

1466

split_tgt_name = "{}-{}".format(tgt_name, index)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1467

split_large_apks.append((split_tgt_name,

split_src_name,

split_tgt_ranges,

split_src_ranges,

patch_content))

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1472

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1473

print("Finding transfers...")

1474

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1475

large_apks = []

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1476

split_large_apks = []

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1477

cache_size = common.OPTIONS.cache_size

1478

split_threshold = 0.125

1479

max_blocks_per_transfer = int(cache_size * split_threshold /

1480

self.tgt.blocksize)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1481

empty = RangeSet()

Tianjie Xu

20a86cd

2018-01-12 12:21:00 -0800

[diff] [blame]

1482

for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1483

if tgt_fn == "__ZERO":

1484

# the special "__ZERO" domain is all the blocks not contained

1485

# in any file and that are filled with zeros. We have a

1486

# special transfer style for zero blocks.

1487

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1488

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1489

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1490

continue

1491

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1492

elif tgt_fn == "__COPY":

1493

# "__COPY" domain includes all the blocks not contained in any

1494

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1495

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1496

continue

1497

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1498

elif tgt_fn in self.src.file_map:

1499

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1500

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1501

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1502

continue

1503

1504

b = os.path.basename(tgt_fn)

1505

if b in self.src_basenames:

1506

# Look for an exact basename match in the source.

1507

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1508

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1509

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1510

continue

1511

1512

b = re.sub("[0-9]+", "#", b)

1513

if b in self.src_numpatterns:

1514

# Look for a 'number pattern' match (a basename match after

1515

# all runs of digits are replaced by "#"). (This is useful

1516

# for .so files that contain version numbers in the filename

1517

# that get bumped.)

1518

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1519

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1520

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1521

continue

1522

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1523

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1524

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1525

transfer_lock = threading.Lock()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1526

threads = [threading.Thread(target=SplitLargeApks)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1527

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1533

# Sort the split transfers for large apks to generate a determinate package.

1534

split_large_apks.sort()

1535

for (tgt_name, src_name, tgt_ranges, src_ranges,

1536

patch) in split_large_apks:

1537

transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1538

self.tgt.RangeSha1(tgt_ranges),

1539

self.src.RangeSha1(src_ranges),

1540

"diff", self.transfers)

1541

transfer_split.patch = patch

1542

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1543

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1544

for k in self.src.file_map.keys():

1545

b = os.path.basename(k)

1546

self.src_basenames[b] = k

1547

b = re.sub("[0-9]+", "#", b)

1548

self.src_numpatterns[b] = k

1549

1550

@staticmethod

1551

def AssertPartition(total, seq):

1552

"""Assert that all the RangeSets in 'seq' form a partition of the

1553

'total' RangeSet (ie, they are nonintersecting and their union

1554

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1555

Doug Zongker