Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

18

import common

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

19

import copy

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

20

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

21

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

22

import itertools

23

import multiprocessing

24

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

25

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

27

import subprocess

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

28

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

29

import threading

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

30

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

31

from collections import deque, OrderedDict

32

from hashlib import sha1

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

33

from rangelib import RangeSet

34

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

35

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

36

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

37

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

38

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

39

def compute_patch(srcfile, tgtfile, imgdiff=False):

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

40

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

41

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

42

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

43

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

44

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

45

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

46

# here, since they contain temp filenames only.

47

p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,

48

stderr=subprocess.STDOUT)

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

49

output, _ = p.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

50

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

51

if p.returncode != 0:

52

raise ValueError(output)

53

54

with open(patchfile, 'rb') as f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

55

return f.read()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

56

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

57

58

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

59

def RangeSha1(self, ranges):

60

raise NotImplementedError

61

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

62

def ReadRangeSet(self, ranges):

63

raise NotImplementedError

64

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

65

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

66

raise NotImplementedError

67

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

68

def WriteRangeDataToFd(self, ranges, fd):

69

raise NotImplementedError

70

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

71

72

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

73

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

74

75

def __init__(self):

76

self.blocksize = 4096

77

self.care_map = RangeSet()

78

self.clobbered_blocks = RangeSet()

79

self.extended = RangeSet()

80

self.total_blocks = 0

81

self.file_map = {}

82

83

def RangeSha1(self, ranges):

84

return sha1().hexdigest()

85

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

86

def ReadRangeSet(self, ranges):

87

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

88

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

89

def TotalSha1(self, include_clobbered_blocks=False):

90

# EmptyImage always carries empty clobbered_blocks, so

91

# include_clobbered_blocks can be ignored.

92

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

93

return sha1().hexdigest()

94

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

95

def WriteRangeDataToFd(self, ranges, fd):

96

raise ValueError("Can't write data from EmptyImage to file")

97

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

98

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

99

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

100

"""An image wrapped around a single string of data."""

101

102

def __init__(self, data, trim=False, pad=False):

103

self.data = data

104

self.blocksize = 4096

105

106

assert not (trim and pad)

107

108

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

109

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

110

if partial > 0:

111

if trim:

112

self.data = self.data[:-partial]

113

elif pad:

114

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

115

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

116

else:

117

raise ValueError(("data for DataImage must be multiple of %d bytes "

118

"unless trim or pad is specified") %

119

(self.blocksize,))

120

121

assert len(self.data) % self.blocksize == 0

122

123

self.total_blocks = len(self.data) / self.blocksize

124

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

125

# When the last block is padded, we always write the whole block even for

126

# incremental OTAs. Because otherwise the last block may get skipped if

127

# unchanged for an incremental, but would fail the post-install

128

# verification if it has non-zero contents in the padding bytes.

129

# Bug: 23828506

130

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

131

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

132

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

133

clobbered_blocks = []

134

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

135

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

140

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

141

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

142

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

143

if d == reference:

144

zero_blocks.append(i)

145

zero_blocks.append(i+1)

146

else:

147

nonzero_blocks.append(i)

148

nonzero_blocks.append(i+1)

149

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

150

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

151

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

152

self.file_map = dict()

153

if zero_blocks:

154

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

155

if nonzero_blocks:

156

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

157

if clobbered_blocks:

158

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

159

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

160

def _GetRangeData(self, ranges):

161

for s, e in ranges:

162

yield self.data[s*self.blocksize:e*self.blocksize]

163

164

def RangeSha1(self, ranges):

165

h = sha1()

166

for data in self._GetRangeData(ranges):

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

170

def ReadRangeSet(self, ranges):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

171

return [self._GetRangeData(ranges)]

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

172

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

173

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

174

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

175

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

176

else:

177

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

178

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

179

def WriteRangeDataToFd(self, ranges, fd):

180

for data in self._GetRangeData(ranges):

181

fd.write(data)

182

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

183

184

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

185

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

186

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

187

self.tgt_name = tgt_name

188

self.src_name = src_name

189

self.tgt_ranges = tgt_ranges

190

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

191

self.tgt_sha1 = tgt_sha1

192

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

193

self.style = style

194

self.intact = (getattr(tgt_ranges, "monotonic", False) and

195

getattr(src_ranges, "monotonic", False))

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

196

197

# We use OrderedDict rather than dict so that the output is repeatable;

198

# otherwise it would depend on the hash values of the Transfer objects.

199

self.goes_before = OrderedDict()

200

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

201

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

202

self.stash_before = []

203

self.use_stash = []

204

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

self._patch = None

@property

def patch(self):

return self._patch

@patch.setter

def patch(self, patch):

216

if patch:

217

assert self.style == "diff"

218

self._patch = patch

219

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

220

def NetStashChange(self):

221

return (sum(sr.size() for (_, sr) in self.stash_before) -

222

sum(sr.size() for (_, sr) in self.use_stash))

223

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

224

def ConvertToNew(self):

225

assert self.style != "new"

226

self.use_stash = []

227

self.style = "new"

228

self.src_ranges = RangeSet()

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

229

self.patch = None

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

230

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

231

def __str__(self):

232

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

233

" to " + str(self.tgt_ranges) + ">")

234

235

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

236

@functools.total_ordering

237

class HeapItem(object):

238

def __init__(self, item):

239

self.item = item

240

# Negate the score since python's heap is a min-heap and we want

241

# the maximum score.

242

self.score = -item.score

def clear(self):

self.item = None

def __bool__(self):

return self.item is None

247

def __eq__(self, other):

248

return self.score == other.score

249

def __le__(self, other):

250

return self.score <= other.score

251

252

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

253

# BlockImageDiff works on two image objects. An image object is

254

# anything that provides the following attributes:

255

#

256

# blocksize: the size in bytes of a block, currently must be 4096.

257

#

258

# total_blocks: the total size of the partition/image, in blocks.

259

#

260

# care_map: a RangeSet containing which blocks (in the range [0,

261

# total_blocks) we actually care about; i.e. which blocks contain

262

# data.

263

#

264

# file_map: a dict that partitions the blocks contained in care_map

265

# into smaller domains that are useful for doing diffs on.

266

# (Typically a domain is a file, and the key in file_map is the

267

# pathname.)

268

#

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

269

# clobbered_blocks: a RangeSet containing which blocks contain data

270

# but may be altered by the FS. They need to be excluded when

271

# verifying the partition integrity.

272

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

273

# ReadRangeSet(): a function that takes a RangeSet and returns the

274

# data contained in the image blocks of that RangeSet. The data

275

# is returned as a list or tuple of strings; concatenating the

276

# elements together should produce the requested data.

277

# Implementations are free to break up the data into list/tuple

278

# elements in any way that is convenient.

279

#

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

280

# RangeSha1(): a function that returns (as a hex string) the SHA-1

281

# hash of all the data in the specified range.

282

#

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

283

# TotalSha1(): a function that returns (as a hex string) the SHA-1

284

# hash of all the data in the image (ie, all the blocks in the

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

285

# care_map minus clobbered_blocks, or including the clobbered

286

# blocks if include_clobbered_blocks is True).

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

287

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

288

# When creating a BlockImageDiff, the src image may be None, in which

289

# case the list of transfers produced will never read from the

290

# original image.

291

292

class BlockImageDiff(object):

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

293

def __init__(self, tgt, src=None, threads=None, version=4,

294

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

295

if threads is None:

296

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

297

if threads == 0:

298

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

299

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

300

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

301

self.transfers = []

302

self.src_basenames = {}

303

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

304

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

305

self.touched_src_ranges = RangeSet()

306

self.touched_src_sha1 = None

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

307

self.disable_imgdiff = disable_imgdiff

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

308

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

309

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

317

assert tgt.blocksize == 4096

318

assert src.blocksize == 4096

319

320

# The range sets in each filemap should comprise a partition of

321

# the care map.

322

self.AssertPartition(src.care_map, src.file_map.values())

323

self.AssertPartition(tgt.care_map, tgt.file_map.values())

324

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

325

@property

326

def max_stashed_size(self):

327

return self._max_stashed_size

328

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

329

def Compute(self, prefix):

330

# When looking for a source file to use as the diff input for a

331

# target file, we try:

332

# 1) an exact path match if available, otherwise

333

# 2) a exact basename match if available, otherwise

334

# 3) a basename match after all runs of digits are replaced by

335

# "#" if available, otherwise

336

# 4) we have no source for this target.

337

self.AbbreviateSourceNames()

338

self.FindTransfers()

339

340

# Find the ordering dependencies among transfers (this is O(n^2)

341

# in the number of transfers).

342

self.GenerateDigraph()

343

# Find a sequence of transfers that satisfies as many ordering

344

# dependencies as possible (heuristically).

345

self.FindVertexSequence()

346

# Fix up the ordering dependencies that the sequence didn't

347

# satisfy.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

348

self.ReverseBackwardEdges()

349

self.ImproveVertexSequence()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

350

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

351

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

352

if common.OPTIONS.cache_size is not None:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

353

self.ReviseStashSize()

354

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

355

# Double-check our work.

356

self.AssertSequenceGood()

357

358

self.ComputePatches(prefix)

359

self.WriteTransfers(prefix)

360

361

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

362

def WriteSplitTransfers(out, style, target_blocks):

363

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

364

365

This prevents the target size of one command from being too large; and

366

might help to avoid fsync errors on some devices."""

367

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

368

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

369

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

370

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

371

while target_blocks:

372

blocks_to_write = target_blocks.first(blocks_limit)

373

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

374

total += blocks_to_write.size()

375

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

376

return total

377

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

378

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

379

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

380

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

381

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

382

# id. 'stashes' records the map from 'hash' to the ref count. The stash

383

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

384

stashes = {}

385

stashed_blocks = 0

386

max_stashed_blocks = 0

387

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

388

for xf in self.transfers:

389

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

390

for _, sr in xf.stash_before:

391

sh = self.src.RangeSha1(sr)

392

if sh in stashes:

393

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

394

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

395

stashes[sh] = 1

396

stashed_blocks += sr.size()

397

self.touched_src_ranges = self.touched_src_ranges.union(sr)

398

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

399

400

if stashed_blocks > max_stashed_blocks:

401

max_stashed_blocks = stashed_blocks

402

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

403

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

404

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

405

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

406

# <# blocks> <src ranges>

407

# OR

408

# <# blocks> <src ranges> <src locs> <stash refs...>

409

# OR

410

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

411

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

412

size = xf.src_ranges.size()

413

src_str = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

414

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

415

unstashed_src_ranges = xf.src_ranges

416

mapped_stashes = []

417

for _, sr in xf.use_stash:

418

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

419

sh = self.src.RangeSha1(sr)

420

sr = xf.src_ranges.map_within(sr)

421

mapped_stashes.append(sr)

422

assert sh in stashes

423

src_str.append("%s:%s" % (sh, sr.to_string_raw()))

424

stashes[sh] -= 1

425

if stashes[sh] == 0:

426

free_string.append("free %s\n" % (sh,))

427

free_size += sr.size()

428

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

429

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

430

if unstashed_src_ranges:

431

src_str.insert(1, unstashed_src_ranges.to_string_raw())

432

if xf.use_stash:

433

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

434

src_str.insert(2, mapped_unstashed.to_string_raw())

435

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

436

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

437

else:

438

src_str.insert(1, "-")

439

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

440

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

441

src_str = " ".join(src_str)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

442

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

443

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

444

# zero <rangeset>

445

# new <rangeset>

446

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

447

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

448

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

449

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

450

451

tgt_size = xf.tgt_ranges.size()

452

453

if xf.style == "new":

454

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

455

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

456

total += tgt_size

457

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

458

assert xf.tgt_ranges

459

assert xf.src_ranges.size() == tgt_size

460

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

461

# take into account automatic stashing of overlapping blocks

462

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

463

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

464

if temp_stash_usage > max_stashed_blocks:

465

max_stashed_blocks = temp_stash_usage

466

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

467

self.touched_src_ranges = self.touched_src_ranges.union(

468

xf.src_ranges)

469

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

470

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

471

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

472

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

473

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

474

total += tgt_size

475

elif xf.style in ("bsdiff", "imgdiff"):

476

assert xf.tgt_ranges

477

assert xf.src_ranges

478

# take into account automatic stashing of overlapping blocks

479

if xf.src_ranges.overlaps(xf.tgt_ranges):

480

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

481

if temp_stash_usage > max_stashed_blocks:

482

max_stashed_blocks = temp_stash_usage

483

484

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

485

486

out.append("%s %d %d %s %s %s %s\n" % (

487

xf.style,

488

xf.patch_start, xf.patch_len,

489

xf.src_sha1,

490

xf.tgt_sha1,

491

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

492

total += tgt_size

493

elif xf.style == "zero":

494

assert xf.tgt_ranges

495

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

496

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

497

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

498

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

499

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

500

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

501

if free_string:

502

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

503

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

504

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

505

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

506

# Sanity check: abort if we're going to need more stash space than

507

# the allowed size (cache_size * threshold). There are two purposes

508

# of having a threshold here. a) Part of the cache may have been

509

# occupied by some recovery logs. b) It will buy us some time to deal

510

# with the oversize issue.

511

cache_size = common.OPTIONS.cache_size

512

stash_threshold = common.OPTIONS.stash_threshold

513

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

514

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

515

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

516

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

517

self.tgt.blocksize, max_allowed, cache_size,

518

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

519

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

520

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

521

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

522

# Zero out extended blocks as a workaround for bug 20881595.

523

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

524

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

525

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

526

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

527

528

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

529

# data in the new image; b) will not be touched by dm-verity. Out of those

530

# blocks, we erase the ones that won't be used in this update at the

531

# beginning of an update. The rest would be erased at the end. This is to

532

# work around the eMMC issue observed on some devices, which may otherwise

533

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

534

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

535

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

536

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

537

538

erase_first = new_dontcare.subtract(self.touched_src_ranges)

539

if erase_first:

540

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

541

542

erase_last = new_dontcare.subtract(erase_first)

543

if erase_last:

544

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

545

546

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

547

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

548

# v3+: the number of stash slots is unused.

549

out.insert(2, "0\n")

550

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

551

552

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

556

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

557

OPTIONS = common.OPTIONS

558

if OPTIONS.cache_size is not None:

559

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

560

print("max stashed blocks: %d (%d bytes), "

561

"limit: %d bytes (%.2f%%)\n" % (

562

max_stashed_blocks, self._max_stashed_size, max_allowed,

563

self._max_stashed_size * 100.0 / max_allowed))

564

else:

565

print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (

566

max_stashed_blocks, self._max_stashed_size))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

567

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

568

def ReviseStashSize(self):

569

print("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

570

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

571

572

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

573

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

574

for xf in self.transfers:

575

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

576

for stash_raw_id, sr in xf.stash_before:

577

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

578

579

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

580

for stash_raw_id, _ in xf.use_stash:

581

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

582

583

# Compute the maximum blocks available for stash based on /cache size and

584

# the threshold.

585

cache_size = common.OPTIONS.cache_size

586

stash_threshold = common.OPTIONS.stash_threshold

587

max_allowed = cache_size * stash_threshold / self.tgt.blocksize

588

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

589

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

590

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

591

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

592

new_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

593

594

# Now go through all the commands. Compute the required stash size on the

595

# fly. If a command requires excess stash than available, it deletes the

596

# stash by replacing the command that uses the stash with a "new" command

597

# instead.

598

for xf in self.transfers:

599

replaced_cmds = []

600

601

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

602

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

603

# Check the post-command stashed_blocks.

604

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

605

sh = self.src.RangeSha1(sr)

606

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

607

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

608

609

if stashed_blocks_after > max_allowed:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

610

# We cannot stash this one for a later command. Find out the command

611

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

612

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

613

replaced_cmds.append(use_cmd)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

614

print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

615

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

616

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

617

if sh in stashes:

618

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

619

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

620

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

621

stashed_blocks = stashed_blocks_after

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

622

623

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

624

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

625

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

626

assert xf.tgt_ranges and xf.src_ranges

627

if xf.src_ranges.overlaps(xf.tgt_ranges):

628

if stashed_blocks + xf.src_ranges.size() > max_allowed:

629

replaced_cmds.append(xf)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

630

print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

631

632

# Replace the commands in replaced_cmds with "new"s.

633

for cmd in replaced_cmds:

634

# It no longer uses any commands in "use_stash". Remove the def points

635

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

636

for stash_raw_id, sr in cmd.use_stash:

637

def_cmd = stash_map[stash_raw_id][1]

638

assert (stash_raw_id, sr) in def_cmd.stash_before

639

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

640

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

641

# Add up blocks that violates space limit and print total number to

642

# screen later.

643

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

644

cmd.ConvertToNew()

645

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

646

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

647

for _, sr in xf.use_stash:

648

sh = self.src.RangeSha1(sr)

649

assert sh in stashes

650

stashes[sh] -= 1

651

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

652

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

653

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

654

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

655

num_of_bytes = new_blocks * self.tgt.blocksize

656

print(" Total %d blocks (%d bytes) are packed as new blocks due to "

657

"insufficient cache size." % (new_blocks, num_of_bytes))

Tao Bao

304ee27

2016-12-19 11:01:38 -0800

[diff] [blame]

658

return new_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

659

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

660

def ComputePatches(self, prefix):

661

print("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

662

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

663

patch_num = 0

664

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

665

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

666

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

667

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

668

print("%10d %10d (%6.2f%%) %7s %s %s" % (

669

tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,

670

str(xf.tgt_ranges)))

671

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

672

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

673

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

674

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

675

print("%10d %10d (%6.2f%%) %7s %s %s" % (

676

tgt_size, tgt_size, 100.0, xf.style,

677

xf.tgt_name, str(xf.tgt_ranges)))

678

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

679

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

680

# We can't compare src and tgt directly because they may have

681

# the same content but be broken up into blocks differently, eg:

682

#

683

# ["he", "llo"] vs ["h", "ello"]

684

#

685

# We want those to compare equal, ideally without having to

686

# actually concatenate the strings (these may be tens of

687

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

688

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

689

# These are identical; we don't need to generate a patch,

690

# just issue copy commands on the device.

691

xf.style = "move"

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

692

xf.patch = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

693

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

694

if xf.src_ranges != xf.tgt_ranges:

695

print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (

696

tgt_size, tgt_size, 100.0, xf.style,

697

xf.tgt_name if xf.tgt_name == xf.src_name else (

698

xf.tgt_name + " (from " + xf.src_name + ")"),

699

str(xf.tgt_ranges), str(xf.src_ranges)))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

700

else:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

701

if xf.patch:

702

# We have already generated the patch with imgdiff. Check if the

703

# transfer is intact.

704

assert not self.disable_imgdiff

imgdiff = True

if not xf.intact:

imgdiff = False

xf.patch = None

else:

# For files in zip format (eg, APKs, JARs, etc.) we would

711

# like to use imgdiff -z if possible (because it usually

712

# produces significantly smaller patches than bsdiff).

713

# This is permissible if:

714

#

715

# - imgdiff is not disabled, and

716

# - the source and target files are monotonic (ie, the

717

# data is stored with blocks in increasing order), and

718

# - we haven't removed any blocks from the source set.

719

#

720

# If these conditions are satisfied then appending all the

721

# blocks in the set together in order will produce a valid

722

# zip file (plus possibly extra zeros in the last block),

723

# which is what imgdiff needs to operate. (imgdiff is

724

# fine with extra zeros at the end of the file.)

725

imgdiff = (not self.disable_imgdiff and xf.intact and

726

xf.tgt_name.split(".")[-1].lower()

727

in ("apk", "jar", "zip"))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

728

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

729

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

734

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

735

if diff_queue:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

736

if self.threads > 1:

737

print("Computing patches (using %d threads)..." % (self.threads,))

738

else:

739

print("Computing patches...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

740

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

741

diff_total = len(diff_queue)

742

patches = [None] * diff_total

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

743

error_messages = []

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

744

warning_messages = []

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

745

if sys.stdout.isatty():

746

global diff_done

747

diff_done = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

748

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

749

# Using multiprocessing doesn't give additional benefits, due to the

750

# pattern of the code. The diffing work is done by subprocess.call, which

751

# already runs in a separate process (not affected much by the GIL -

752

# Global Interpreter Lock). Using multiprocess also requires either a)

753

# writing the diff input files in the main process before forking, or b)

754

# reopening the image file (SparseImage) in the worker processes. Doing

755

# neither of them further improves the performance.

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

756

lock = threading.Lock()

757

def diff_worker():

758

while True:

759

with lock:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

760

if not diff_queue:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

761

return

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

762

xf_index, imgdiff, patch_index = diff_queue.pop()

763

764

xf = self.transfers[xf_index]

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

765

patch = xf.patch

766

if not patch:

767

src_ranges = xf.src_ranges

768

tgt_ranges = xf.tgt_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

769

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

770

# Needs lock since WriteRangeDataToFd() is stateful (calling seek).

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

771

with lock:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

772

src_file = common.MakeTempFile(prefix="src-")

773

with open(src_file, "wb") as fd:

774

self.src.WriteRangeDataToFd(src_ranges, fd)

775

776

tgt_file = common.MakeTempFile(prefix="tgt-")

777

with open(tgt_file, "wb") as fd:

778

self.tgt.WriteRangeDataToFd(tgt_ranges, fd)

message = []

try:

patch = compute_patch(src_file, tgt_file, imgdiff)

783

except ValueError as e:

784

message.append(

785

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

786

"imgdiff" if imgdiff else "bsdiff",

787

xf.tgt_name if xf.tgt_name == xf.src_name else

788

xf.tgt_name + " (from " + xf.src_name + ")",

789

xf.tgt_ranges, xf.src_ranges, e.message))

790

# TODO(b/68016761): Better handle the holes in mke2fs created

# images.

if imgdiff:

try:

patch = compute_patch(src_file, tgt_file, imgdiff=False)

795

message.append(

796

"Fell back and generated with bsdiff instead for %s" % (

xf.tgt_name,))

xf.style = "bsdiff"

with lock:

warning_messages.extend(message)

801

del message[:]

802

except ValueError as e:

803

message.append(

804

"Also failed to generate with bsdiff for %s:\n%s" % (

805

xf.tgt_name, e.message))

if message:

with lock:

error_messages.extend(message)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

810

811

with lock:

812

patches[patch_index] = (xf_index, patch)

813

if sys.stdout.isatty():

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

814

global diff_done

815

diff_done += 1

816

progress = diff_done * 100 / diff_total

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

817

# '\033[K' is to clear to EOL.

818

print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')

819

sys.stdout.flush()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

820

821

threads = [threading.Thread(target=diff_worker)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

822

for _ in range(self.threads)]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

827

828

if sys.stdout.isatty():

829

print('\n')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

830

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

831

if warning_messages:

832

print('WARNING:')

833

print('\n'.join(warning_messages))

834

print('\n\n\n')

835

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

836

if error_messages:

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

837

print('ERROR:')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

838

print('\n'.join(error_messages))

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

839

print('\n\n\n')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

840

sys.exit(1)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

else:

patches = []

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

844

offset = 0

845

with open(prefix + ".patch.dat", "wb") as patch_fd:

846

for index, patch in patches:

847

xf = self.transfers[index]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

848

xf.patch_len = len(patch)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

849

xf.patch_start = offset

850

offset += xf.patch_len

851

patch_fd.write(patch)

852

853

if common.OPTIONS.verbose:

854

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

855

print("%10d %10d (%6.2f%%) %7s %s %s %s" % (

856

xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,

857

xf.style,

858

xf.tgt_name if xf.tgt_name == xf.src_name else (

859

xf.tgt_name + " (from " + xf.src_name + ")"),

860

xf.tgt_ranges, xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

861

862

def AssertSequenceGood(self):

863

# Simulate the sequences of transfers we will output, and check that:

864

# - we never read a block after writing it, and

865

# - we write every block we care about exactly once.

866

867

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

868

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

869

870

# Imagine processing the transfers in order.

871

for xf in self.transfers:

872

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

873

874

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

875

for _, sr in xf.use_stash:

876

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

877

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

878

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

879

# Source image could be larger. Don't check the blocks that are in the

880

# source image only. Since they are not in 'touched', and won't ever

881

# be touched.

882

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

883

assert touched[i] == 0

884

885

# Check that the output blocks for this transfer haven't yet

886

# been touched, and touch all the blocks written by this

887

# transfer.

888

for s, e in xf.tgt_ranges:

889

for i in range(s, e):

890

assert touched[i] == 0

891

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

892

893

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

894

for s, e in self.tgt.care_map:

895

for i in range(s, e):

896

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

897

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

898

def ImproveVertexSequence(self):

899

print("Improving vertex order...")

900

901

# At this point our digraph is acyclic; we reversed any edges that

902

# were backwards in the heuristically-generated sequence. The

903

# previously-generated order is still acceptable, but we hope to

904

# find a better order that needs less memory for stashed data.

905

# Now we do a topological sort to generate a new vertex order,

906

# using a greedy algorithm to choose which vertex goes next

907

# whenever we have a choice.

908

909

# Make a copy of the edge set; this copy will get destroyed by the

910

# algorithm.

911

for xf in self.transfers:

912

xf.incoming = xf.goes_after.copy()

913

xf.outgoing = xf.goes_before.copy()

914

915

L = [] # the new vertex order

916

917

# S is the set of sources in the remaining graph; we always choose

918

# the one that leaves the least amount of stashed data after it's

919

# executed.

920

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

926

L.append(xf)

927

for u in xf.outgoing:

928

del u.incoming[xf]

929

if not u.incoming:

930

heapq.heappush(S, (u.NetStashChange(), u.order, u))

931

932

# if this fails then our graph had a cycle.

933

assert len(L) == len(self.transfers)

934

935

self.transfers = L

936

for i, xf in enumerate(L):

937

xf.order = i

938

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

939

def RemoveBackwardEdges(self):

940

print("Removing backward edges...")

in_order = 0

out_of_order = 0

lost_source = 0

for xf in self.transfers:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

946

lost = 0

947

size = xf.src_ranges.size()

948

for u in xf.goes_before:

949

# xf should go before u

950

if xf.order < u.order:

951

# it does, hurray!

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

952

in_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

953

else:

954

# it doesn't, boo. trim the blocks that u writes from xf's

955

# source, so that xf can go after u.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

956

out_of_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

957

assert xf.src_ranges.overlaps(u.tgt_ranges)

958

xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)

959

xf.intact = False

960

961

if xf.style == "diff" and not xf.src_ranges:

962

# nothing left to diff from; treat as new data

963

xf.style = "new"

964

965

lost = size - xf.src_ranges.size()

966

lost_source += lost

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

967

968

print((" %d/%d dependencies (%.2f%%) were violated; "

969

"%d source blocks removed.") %

970

(out_of_order, in_order + out_of_order,

971

(out_of_order * 100.0 / (in_order + out_of_order))

972

if (in_order + out_of_order) else 0.0,

973

lost_source))

974

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

975

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

976

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

977

978

For each transfer, make sure it properly stashes the blocks it touches and

979

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

980

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

981

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

982

it is possible to have multiple pairs with different 'stash_raw_id's. Each

983

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

984

blocks will be written to the same stash slot in WriteTransfers().

985

"""

986

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

987

print("Reversing backward edges...")

988

in_order = 0

989

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

990

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

991

stash_size = 0

992

993

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

994

for u in xf.goes_before.copy():

995

# xf should go before u

996

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

1001

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

1006

assert overlap

1007

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1008

u.stash_before.append((stash_raw_id, overlap))

1009

xf.use_stash.append((stash_raw_id, overlap))

1010

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1011

stash_size += overlap.size()

1012

1013

# reverse the edge direction; now xf must go after u

1014

del xf.goes_before[u]

1015

del u.goes_after[xf]

1016

xf.goes_after[u] = None # value doesn't matter

1017

u.goes_before[xf] = None

1018

1019

print((" %d/%d dependencies (%.2f%%) were violated; "

1020

"%d source blocks stashed.") %

1021

(out_of_order, in_order + out_of_order,

1022

(out_of_order * 100.0 / (in_order + out_of_order))

1023

if (in_order + out_of_order) else 0.0,

1024

stash_size))

1025

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1026

def FindVertexSequence(self):

1027

print("Finding vertex sequence...")

1028

1029

# This is based on "A Fast & Effective Heuristic for the Feedback

1030

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1031

# it as starting with the digraph G and moving all the vertices to

1032

# be on a horizontal line in some order, trying to minimize the

1033

# number of edges that end up pointing to the left. Left-pointing

1034

# edges will get removed to turn the digraph into a DAG. In this

1035

# case each edge has a weight which is the number of source blocks

1036

# we'll lose if that edge is removed; we try to minimize the total

1037

# weight rather than just the number of edges.

1038

1039

# Make a copy of the edge set; this copy will get destroyed by the

1040

# algorithm.

1041

for xf in self.transfers:

1042

xf.incoming = xf.goes_after.copy()

1043

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1044

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1045

1046

# We use an OrderedDict instead of just a set so that the output

1047

# is repeatable; otherwise it would depend on the hash values of

1048

# the transfer objects.

1049

G = OrderedDict()

1050

for xf in self.transfers:

1051

G[xf] = None

1052

s1 = deque() # the left side of the sequence, built from left to right

1053

s2 = deque() # the right side of the sequence, built from right to left

1054

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1055

heap = []

1056

for xf in self.transfers:

1057

xf.heap_item = HeapItem(xf)

1058

heap.append(xf.heap_item)

1059

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1060

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1061

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1062

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1063

# { key1: None, key2: None, ... }.

1064

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1065

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1066

1067

def adjust_score(iu, delta):

1068

iu.score += delta

1069

iu.heap_item.clear()

1070

iu.heap_item = HeapItem(iu)

1071

heapq.heappush(heap, iu.heap_item)

1072

1073

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1074

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1075

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1076

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1077

for u in sinks:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1078

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1079

s2.appendleft(u)

1080

del G[u]

1081

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1082

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1083

if not iu.outgoing:

1084

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1085

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1086

1087

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1088

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1089

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1090

for u in sources:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1091

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1092

s1.append(u)

1093

del G[u]

1094

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1095

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1096

if not iu.incoming:

1097

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1098

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1099

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1100

if not G: break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1101

1102

# Find the "best" vertex to put next. "Best" is the one that

1103

# maximizes the net difference in source blocks saved we get by

1104

# pretending it's a source rather than a sink.

1105

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1106

while True:

1107

u = heapq.heappop(heap)

1108

if u and u.item in G:

1109

u = u.item

1110

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1111

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1112

s1.append(u)

1113

del G[u]

1114

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1115

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1116

if not iu.incoming:

1117

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1118

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1119

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1120

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1121

if not iu.outgoing:

1122

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1123

1124

# Now record the sequence in the 'order' field of each transfer,

1125

# and by rearranging self.transfers to be in the chosen sequence.

1126

1127

new_transfers = []

1128

for x in itertools.chain(s1, s2):

1129

x.order = len(new_transfers)

1130

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1135

1136

def GenerateDigraph(self):

1137

print("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1138

1139

# Each item of source_ranges will be:

1140

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1141

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1142

source_ranges = []

1143

for b in self.transfers:

1144

for s, e in b.src_ranges:

1145

if e > len(source_ranges):

1146

source_ranges.extend([None] * (e-len(source_ranges)))

1147

for i in range(s, e):

1148

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1149

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1150

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1151

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1152

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1153

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1154

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1155

for s, e in a.tgt_ranges:

1156

for i in range(s, e):

1157

if i >= len(source_ranges): break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1158

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1159

if source_ranges[i] is not None:

1160

for j in source_ranges[i]:

1161

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1162

1163

for b in intersections:

1164

if a is b: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1165

1166

# If the blocks written by A are read by B, then B needs to go before A.

1167

i = a.tgt_ranges.intersect(b.src_ranges)

1168

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1169

if b.src_name == "__ZERO":

1170

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1175

b.goes_before[a] = size

1176

a.goes_after[b] = size

1177

1178

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1179

"""Parse the file_map to generate all the transfers."""

1180

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame^]

1181

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,

1182

by_id):

1183

"""Add one or multiple Transfer()s by splitting large files."""

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1184

pieces = 0

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1185

while (tgt_ranges.size() > max_blocks_per_transfer and

1186

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1187

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1188

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1189

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1190

src_first = src_ranges.first(max_blocks_per_transfer)

1191

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1192

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1193

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1194

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1195

1196

tgt_ranges = tgt_ranges.subtract(tgt_first)

1197

src_ranges = src_ranges.subtract(src_first)

1198

pieces += 1

1199

1200

# Handle remaining blocks.

1201

if tgt_ranges.size() or src_ranges.size():

1202

# Must be both non-empty.

1203

assert tgt_ranges.size() and src_ranges.size()

1204

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1205

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1206

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1207

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1208

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1209

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame^]

1210

def FindZipsAndAddSplitTransfers(tgt_name, src_name, tgt_ranges,

1211

src_ranges, style, by_id):

1212

"""Find all the zip archives and add split transfers for the other files.

1213

1214

For BBOTA v3, we need to stash source blocks for resumable feature.

1215

However, with the growth of file size and the shrink of the cache

1216

partition source blocks are too large to be stashed. If a file occupies

1217

too many blocks, we split it into smaller pieces by getting multiple

1218

Transfer()s.

1219

1220

The downside is that after splitting, we may increase the package size

1221

since the split pieces don't align well. According to our experiments,

1222

1/8 of the cache size as the per-piece limit appears to be optimal.

1223

Compared to the fixed 1024-block limit, it reduces the overall package

1224

size by 30% for volantis, and 20% for angler and bullhead."""

1225

1226

assert style == "diff"

1227

1228

# Change nothing for small files.

1229

if (tgt_ranges.size() <= max_blocks_per_transfer and

1230

src_ranges.size() <= max_blocks_per_transfer):

1231

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1232

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

style, by_id)

return

if tgt_name.split(".")[-1].lower() in ("apk", "jar", "zip"):

1237

split_enable = (not self.disable_imgdiff and src_ranges.monotonic and

1238

tgt_ranges.monotonic)

1239

if split_enable and (self.tgt.RangeSha1(tgt_ranges) !=

1240

self.src.RangeSha1(src_ranges)):

1241

large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))

1242

return

1243

1244

AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,

1245

style, by_id)

1246

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1247

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1248

split=False):

1249

"""Wrapper function for adding a Transfer()."""

1250

1251

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1252

# otherwise add the Transfer() as is.

1253

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1254

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1255

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1256

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1257

return

1258

1259

# Handle .odex files specially to analyze the block-wise difference. If

1260

# most of the blocks are identical with only few changes (e.g. header),

1261

# we will patch the changed blocks only. This avoids stashing unchanged

1262

# blocks while patching. We limit the analysis to files without size

1263

# changes only. This is to avoid sacrificing the OTA generation cost too

1264

# much.

1265

if (tgt_name.split(".")[-1].lower() == 'odex' and

1266

tgt_ranges.size() == src_ranges.size()):

1267

1268

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1269

# few blocks remain identical, we lose the opportunity to use imgdiff

1270

# that may have better compression ratio than bsdiff.

1271

crop_threshold = 0.5

1272

1273

tgt_skipped = RangeSet()

1274

src_skipped = RangeSet()

1275

tgt_size = tgt_ranges.size()

1276

tgt_changed = 0

1277

for src_block, tgt_block in zip(src_ranges.next_item(),

1278

tgt_ranges.next_item()):

1279

src_rs = RangeSet(str(src_block))

1280

tgt_rs = RangeSet(str(tgt_block))

1281

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1282

tgt_skipped = tgt_skipped.union(tgt_rs)

1283

src_skipped = src_skipped.union(src_rs)

1284

else:

1285

tgt_changed += tgt_rs.size()

1286

1287

# Terminate early if no clear sign of benefits.

1288

if tgt_changed > tgt_size * crop_threshold:

1289

break

1290

1291

if tgt_changed < tgt_size * crop_threshold:

1292

assert tgt_changed + tgt_skipped.size() == tgt_size

1293

print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,

1294

tgt_skipped.size() * 100.0 / tgt_size, tgt_name))

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame^]

1295

FindZipsAndAddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1296

"%s-skipped" % (tgt_name,),

1297

"%s-skipped" % (src_name,),

1298

tgt_skipped, src_skipped, style, by_id)

1299

1300

# Intentionally change the file extension to avoid being imgdiff'd as

1301

# the files are no longer in their original format.

1302

tgt_name = "%s-cropped" % (tgt_name,)

1303

src_name = "%s-cropped" % (src_name,)

1304

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1305

src_ranges = src_ranges.subtract(src_skipped)

1306

1307

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame^]

1312

FindZipsAndAddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1313

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1314

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1315

def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,

1316

split_info):

1317

"""Parse the split_info and return a list of info tuples.

1318

1319

Args:

1320

patch_size: total size of the patch file.

1321

tgt_ranges: Ranges of the target file within the original image.

1322

src_ranges: Ranges of the source file within the original image.

split_info format:

imgdiff version#

count of pieces

<patch_size_1> <tgt_size_1> <src_ranges_1>

1327

...

1328

<patch_size_n> <tgt_size_n> <src_ranges_n>

1329

1330

Returns:

1331

[patch_start, patch_len, split_tgt_ranges, split_src_ranges]

1332

"""

1333

1334

version = int(split_info[0])

1335

assert version == 2

1336

count = int(split_info[1])

1337

assert len(split_info) - 2 == count

split_info_list = []

patch_start = 0

tgt_remain = copy.deepcopy(tgt_ranges)

1342

# each line has the format <patch_size>, <tgt_size>, <src_ranges>

1343

for line in split_info[2:]:

1344

info = line.split()

1345

assert len(info) == 3

1346

patch_length = int(info[0])

1347

1348

split_tgt_size = int(info[1])

1349

assert split_tgt_size % 4096 == 0

1350

assert split_tgt_size / 4096 <= tgt_remain.size()

1351

split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)

1352

tgt_remain = tgt_remain.subtract(split_tgt_ranges)

1353

1354

# Find the split_src_ranges within the image file from its relative

1355

# position in file.

1356

split_src_indices = RangeSet.parse_raw(info[2])

1357

split_src_ranges = RangeSet()

1358

for r in split_src_indices:

1359

curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))

1360

assert not split_src_ranges.overlaps(curr_range)

1361

split_src_ranges = split_src_ranges.union(curr_range)

1362

1363

split_info_list.append((patch_start, patch_length,

1364

split_tgt_ranges, split_src_ranges))

1365

patch_start += patch_length

1366

1367

# Check that the sizes of all the split pieces add up to the final file

1368

# size for patch and target.

1369

assert tgt_remain.size() == 0

1370

assert patch_start == patch_size

1371

return split_info_list

1372

1373

def AddSplitTransferForLargeApks():

1374

"""Create split transfers for large apk files.

1375

1376

Example: Chrome.apk will be split into

1377

src-0: Chrome.apk-0, tgt-0: Chrome.apk-0

1378

src-1: Chrome.apk-1, tgt-1: Chrome.apk-1

1379

...

1380

1381

After the split, the target pieces are continuous and block aligned; and

1382

the source pieces are mutually exclusive. During the split, we also

1383

generate and save the image patch between src-X & tgt-X. This patch will

1384

be valid because the block ranges of src-X & tgt-X will always stay the

1385

same afterwards; but there's a chance we don't use the patch if we

1386

convert the "diff" command into "new" or "move" later.

"""

while True:

with transfer_lock:

if not large_apks:

return

tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)

1394

1395

src_file = common.MakeTempFile(prefix="src-")

1396

tgt_file = common.MakeTempFile(prefix="tgt-")

1397

with transfer_lock:

1398

with open(src_file, "wb") as src_fd:

1399

self.src.WriteRangeDataToFd(src_ranges, src_fd)

1400

with open(tgt_file, "wb") as tgt_fd:

1401

self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)

1402

1403

patch_file = common.MakeTempFile(prefix="patch-")

1404

patch_info_file = common.MakeTempFile(prefix="split_info-")

1405

cmd = ["imgdiff", "-z",

1406

"--block-limit={}".format(max_blocks_per_transfer),

1407

"--split-info=" + patch_info_file,

1408

src_file, tgt_file, patch_file]

1409

p = common.Run(cmd, stdout=subprocess.PIPE)

1410

p.communicate()

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1411

if p.returncode != 0:

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame^]

1412

print("Failed to create patch between {} and {},"

1413

" falling back to bsdiff".format(src_name, tgt_name))

1414

with transfer_lock:

1415

AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,

1416

"diff", self.transfers)

1417

continue

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1418

1419

with open(patch_info_file) as patch_info:

1420

lines = patch_info.readlines()

1421

1422

patch_size_total = os.path.getsize(patch_file)

1423

split_info_list = ParseAndValidateSplitInfo(patch_size_total,

1424

tgt_ranges, src_ranges,

1425

lines)

1426

for index, (patch_start, patch_length, split_tgt_ranges,

1427

split_src_ranges) in enumerate(split_info_list):

1428

with open(patch_file) as f:

1429

f.seek(patch_start)

1430

patch_content = f.read(patch_length)

1431

1432

split_src_name = "{}-{}".format(src_name, index)

1433

split_tgt_name = "{}-{}".format(tgt_name, index)

1434

transfer_split = Transfer(split_tgt_name, split_src_name,

1435

split_tgt_ranges, split_src_ranges,

1436

self.tgt.RangeSha1(split_tgt_ranges),

1437

self.src.RangeSha1(split_src_ranges),

1438

"diff", self.transfers)

1439

transfer_split.patch = patch_content

1440

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1441

print("Finding transfers...")

1442

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1443

large_apks = []

1444

cache_size = common.OPTIONS.cache_size

1445

split_threshold = 0.125

1446

max_blocks_per_transfer = int(cache_size * split_threshold /

1447

self.tgt.blocksize)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1448

empty = RangeSet()

1449

for tgt_fn, tgt_ranges in self.tgt.file_map.items():

1450

if tgt_fn == "__ZERO":

1451

# the special "__ZERO" domain is all the blocks not contained

1452

# in any file and that are filled with zeros. We have a

1453

# special transfer style for zero blocks.

1454

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1455

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1456

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1457

continue

1458

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1459

elif tgt_fn == "__COPY":

1460

# "__COPY" domain includes all the blocks not contained in any

1461

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1462

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1463

continue

1464

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1465

elif tgt_fn in self.src.file_map:

1466

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1467

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1468

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1469

continue

1470

1471

b = os.path.basename(tgt_fn)

1472

if b in self.src_basenames:

1473

# Look for an exact basename match in the source.

1474

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1475

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1476

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1477

continue

1478

1479

b = re.sub("[0-9]+", "#", b)

1480

if b in self.src_numpatterns:

1481

# Look for a 'number pattern' match (a basename match after

1482

# all runs of digits are replaced by "#"). (This is useful

1483

# for .so files that contain version numbers in the filename

1484

# that get bumped.)

1485

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1486

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1487

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1488

continue

1489

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1490

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1491

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1492

transfer_lock = threading.Lock()

1493

threads = [threading.Thread(target=AddSplitTransferForLargeApks)

1494

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1500

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1501

for k in self.src.file_map.keys():

1502

b = os.path.basename(k)

1503

self.src_basenames[b] = k

1504

b = re.sub("[0-9]+", "#", b)

1505

self.src_numpatterns[b] = k

1506

1507

@staticmethod

1508

def AssertPartition(total, seq):

1509

"""Assert that all the RangeSets in 'seq' form a partition of the

1510

'total' RangeSet (ie, they are nonintersecting and their union

1511

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1512

Doug Zongker