Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

18

import common

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

19

import copy

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

20

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

21

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

22

import itertools

23

import multiprocessing

24

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

25

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

27

import subprocess

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

28

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

29

import threading

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

30

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

31

from collections import deque, OrderedDict

32

from hashlib import sha1

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

33

from rangelib import RangeSet

34

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

35

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

36

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

37

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

38

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

39

def compute_patch(srcfile, tgtfile, imgdiff=False):

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

40

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

41

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

42

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

43

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

44

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

45

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

46

# here, since they contain temp filenames only.

47

p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,

48

stderr=subprocess.STDOUT)

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

49

output, _ = p.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

50

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

51

if p.returncode != 0:

52

raise ValueError(output)

53

54

with open(patchfile, 'rb') as f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

55

return f.read()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

56

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

57

58

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

59

def RangeSha1(self, ranges):

60

raise NotImplementedError

61

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

62

def ReadRangeSet(self, ranges):

63

raise NotImplementedError

64

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

65

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

66

raise NotImplementedError

67

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

68

def WriteRangeDataToFd(self, ranges, fd):

69

raise NotImplementedError

70

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

71

72

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

73

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

74

75

def __init__(self):

76

self.blocksize = 4096

77

self.care_map = RangeSet()

78

self.clobbered_blocks = RangeSet()

79

self.extended = RangeSet()

80

self.total_blocks = 0

81

self.file_map = {}

82

83

def RangeSha1(self, ranges):

84

return sha1().hexdigest()

85

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

86

def ReadRangeSet(self, ranges):

87

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

88

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

89

def TotalSha1(self, include_clobbered_blocks=False):

90

# EmptyImage always carries empty clobbered_blocks, so

91

# include_clobbered_blocks can be ignored.

92

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

93

return sha1().hexdigest()

94

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

95

def WriteRangeDataToFd(self, ranges, fd):

96

raise ValueError("Can't write data from EmptyImage to file")

97

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

98

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

99

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

100

"""An image wrapped around a single string of data."""

101

102

def __init__(self, data, trim=False, pad=False):

103

self.data = data

104

self.blocksize = 4096

105

106

assert not (trim and pad)

107

108

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

109

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

110

if partial > 0:

111

if trim:

112

self.data = self.data[:-partial]

113

elif pad:

114

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

115

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

116

else:

117

raise ValueError(("data for DataImage must be multiple of %d bytes "

118

"unless trim or pad is specified") %

119

(self.blocksize,))

120

121

assert len(self.data) % self.blocksize == 0

122

123

self.total_blocks = len(self.data) / self.blocksize

124

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

125

# When the last block is padded, we always write the whole block even for

126

# incremental OTAs. Because otherwise the last block may get skipped if

127

# unchanged for an incremental, but would fail the post-install

128

# verification if it has non-zero contents in the padding bytes.

129

# Bug: 23828506

130

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

131

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

132

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

133

clobbered_blocks = []

134

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

135

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

140

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

141

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

142

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

143

if d == reference:

144

zero_blocks.append(i)

145

zero_blocks.append(i+1)

146

else:

147

nonzero_blocks.append(i)

148

nonzero_blocks.append(i+1)

149

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

150

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

151

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

152

self.file_map = dict()

153

if zero_blocks:

154

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

155

if nonzero_blocks:

156

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

157

if clobbered_blocks:

158

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

159

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

160

def _GetRangeData(self, ranges):

161

for s, e in ranges:

162

yield self.data[s*self.blocksize:e*self.blocksize]

163

164

def RangeSha1(self, ranges):

165

h = sha1()

166

for data in self._GetRangeData(ranges):

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

170

def ReadRangeSet(self, ranges):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

171

return [self._GetRangeData(ranges)]

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

172

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

173

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

174

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

175

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

176

else:

177

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

178

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

179

def WriteRangeDataToFd(self, ranges, fd):

180

for data in self._GetRangeData(ranges):

181

fd.write(data)

182

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

183

184

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

185

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

186

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

187

self.tgt_name = tgt_name

188

self.src_name = src_name

189

self.tgt_ranges = tgt_ranges

190

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

191

self.tgt_sha1 = tgt_sha1

192

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

193

self.style = style

194

self.intact = (getattr(tgt_ranges, "monotonic", False) and

195

getattr(src_ranges, "monotonic", False))

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

196

197

# We use OrderedDict rather than dict so that the output is repeatable;

198

# otherwise it would depend on the hash values of the Transfer objects.

199

self.goes_before = OrderedDict()

200

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

201

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

202

self.stash_before = []

203

self.use_stash = []

204

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

self._patch = None

@property

def patch(self):

return self._patch

@patch.setter

def patch(self, patch):

216

if patch:

217

assert self.style == "diff"

218

self._patch = patch

219

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

220

def NetStashChange(self):

221

return (sum(sr.size() for (_, sr) in self.stash_before) -

222

sum(sr.size() for (_, sr) in self.use_stash))

223

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

224

def ConvertToNew(self):

225

assert self.style != "new"

226

self.use_stash = []

227

self.style = "new"

228

self.src_ranges = RangeSet()

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

229

self.patch = None

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

230

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

231

def __str__(self):

232

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

233

" to " + str(self.tgt_ranges) + ">")

234

235

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

236

@functools.total_ordering

237

class HeapItem(object):

238

def __init__(self, item):

239

self.item = item

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

240

# Negate the score since python's heap is a min-heap and we want the

241

# maximum score.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

242

self.score = -item.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

243

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

244

def clear(self):

245

self.item = None

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

246

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

247

def __bool__(self):

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

248

return self.item is not None

249

250

# Python 2 uses __nonzero__, while Python 3 uses __bool__.

251

__nonzero__ = __bool__

252

253

# The rest operations are generated by functools.total_ordering decorator.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

254

def __eq__(self, other):

255

return self.score == other.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

256

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

257

def __le__(self, other):

258

return self.score <= other.score

259

260

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

261

# BlockImageDiff works on two image objects. An image object is

262

# anything that provides the following attributes:

263

#

264

# blocksize: the size in bytes of a block, currently must be 4096.

265

#

266

# total_blocks: the total size of the partition/image, in blocks.

267

#

268

# care_map: a RangeSet containing which blocks (in the range [0,

269

# total_blocks) we actually care about; i.e. which blocks contain

270

# data.

271

#

272

# file_map: a dict that partitions the blocks contained in care_map

273

# into smaller domains that are useful for doing diffs on.

274

# (Typically a domain is a file, and the key in file_map is the

275

# pathname.)

276

#

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

277

# clobbered_blocks: a RangeSet containing which blocks contain data

278

# but may be altered by the FS. They need to be excluded when

279

# verifying the partition integrity.

280

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

281

# ReadRangeSet(): a function that takes a RangeSet and returns the

282

# data contained in the image blocks of that RangeSet. The data

283

# is returned as a list or tuple of strings; concatenating the

284

# elements together should produce the requested data.

285

# Implementations are free to break up the data into list/tuple

286

# elements in any way that is convenient.

287

#

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

288

# RangeSha1(): a function that returns (as a hex string) the SHA-1

289

# hash of all the data in the specified range.

290

#

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

291

# TotalSha1(): a function that returns (as a hex string) the SHA-1

292

# hash of all the data in the image (ie, all the blocks in the

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

293

# care_map minus clobbered_blocks, or including the clobbered

294

# blocks if include_clobbered_blocks is True).

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

295

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

296

# When creating a BlockImageDiff, the src image may be None, in which

297

# case the list of transfers produced will never read from the

298

# original image.

299

300

class BlockImageDiff(object):

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

301

def __init__(self, tgt, src=None, threads=None, version=4,

302

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

303

if threads is None:

304

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

305

if threads == 0:

306

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

307

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

308

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

309

self.transfers = []

310

self.src_basenames = {}

311

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

312

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

313

self.touched_src_ranges = RangeSet()

314

self.touched_src_sha1 = None

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

315

self.disable_imgdiff = disable_imgdiff

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

316

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

317

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

325

assert tgt.blocksize == 4096

326

assert src.blocksize == 4096

327

328

# The range sets in each filemap should comprise a partition of

329

# the care map.

330

self.AssertPartition(src.care_map, src.file_map.values())

331

self.AssertPartition(tgt.care_map, tgt.file_map.values())

332

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

333

@property

334

def max_stashed_size(self):

335

return self._max_stashed_size

336

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

337

def Compute(self, prefix):

338

# When looking for a source file to use as the diff input for a

339

# target file, we try:

340

# 1) an exact path match if available, otherwise

341

# 2) a exact basename match if available, otherwise

342

# 3) a basename match after all runs of digits are replaced by

343

# "#" if available, otherwise

344

# 4) we have no source for this target.

345

self.AbbreviateSourceNames()

346

self.FindTransfers()

347

348

# Find the ordering dependencies among transfers (this is O(n^2)

349

# in the number of transfers).

350

self.GenerateDigraph()

351

# Find a sequence of transfers that satisfies as many ordering

352

# dependencies as possible (heuristically).

353

self.FindVertexSequence()

354

# Fix up the ordering dependencies that the sequence didn't

355

# satisfy.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

356

self.ReverseBackwardEdges()

357

self.ImproveVertexSequence()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

358

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

359

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

360

if common.OPTIONS.cache_size is not None:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

361

self.ReviseStashSize()

362

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

363

# Double-check our work.

364

self.AssertSequenceGood()

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

365

self.AssertSha1Good()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

366

367

self.ComputePatches(prefix)

368

self.WriteTransfers(prefix)

369

370

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

371

def WriteSplitTransfers(out, style, target_blocks):

372

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

373

374

This prevents the target size of one command from being too large; and

375

might help to avoid fsync errors on some devices."""

376

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

377

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

378

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

379

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

380

while target_blocks:

381

blocks_to_write = target_blocks.first(blocks_limit)

382

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

383

total += blocks_to_write.size()

384

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

385

return total

386

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

387

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

388

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

389

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

390

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

391

# id. 'stashes' records the map from 'hash' to the ref count. The stash

392

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

393

stashes = {}

394

stashed_blocks = 0

395

max_stashed_blocks = 0

396

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

397

for xf in self.transfers:

398

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

399

for _, sr in xf.stash_before:

400

sh = self.src.RangeSha1(sr)

401

if sh in stashes:

402

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

403

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

404

stashes[sh] = 1

405

stashed_blocks += sr.size()

406

self.touched_src_ranges = self.touched_src_ranges.union(sr)

407

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

408

409

if stashed_blocks > max_stashed_blocks:

410

max_stashed_blocks = stashed_blocks

411

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

412

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

413

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

414

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

415

# <# blocks> <src ranges>

416

# OR

417

# <# blocks> <src ranges> <src locs> <stash refs...>

418

# OR

419

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

420

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

421

size = xf.src_ranges.size()

422

src_str = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

423

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

424

unstashed_src_ranges = xf.src_ranges

425

mapped_stashes = []

426

for _, sr in xf.use_stash:

427

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

428

sh = self.src.RangeSha1(sr)

429

sr = xf.src_ranges.map_within(sr)

430

mapped_stashes.append(sr)

431

assert sh in stashes

432

src_str.append("%s:%s" % (sh, sr.to_string_raw()))

433

stashes[sh] -= 1

434

if stashes[sh] == 0:

435

free_string.append("free %s\n" % (sh,))

436

free_size += sr.size()

437

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

438

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

439

if unstashed_src_ranges:

440

src_str.insert(1, unstashed_src_ranges.to_string_raw())

441

if xf.use_stash:

442

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

443

src_str.insert(2, mapped_unstashed.to_string_raw())

444

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

445

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

446

else:

447

src_str.insert(1, "-")

448

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

449

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

450

src_str = " ".join(src_str)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

451

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

452

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

453

# zero <rangeset>

454

# new <rangeset>

455

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

456

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

457

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

458

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

459

460

tgt_size = xf.tgt_ranges.size()

461

462

if xf.style == "new":

463

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

464

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

465

total += tgt_size

466

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

467

assert xf.tgt_ranges

468

assert xf.src_ranges.size() == tgt_size

469

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

470

# take into account automatic stashing of overlapping blocks

471

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

472

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

473

if temp_stash_usage > max_stashed_blocks:

474

max_stashed_blocks = temp_stash_usage

475

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

476

self.touched_src_ranges = self.touched_src_ranges.union(

477

xf.src_ranges)

478

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

479

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

480

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

481

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

482

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

483

total += tgt_size

484

elif xf.style in ("bsdiff", "imgdiff"):

485

assert xf.tgt_ranges

486

assert xf.src_ranges

487

# take into account automatic stashing of overlapping blocks

488

if xf.src_ranges.overlaps(xf.tgt_ranges):

489

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

490

if temp_stash_usage > max_stashed_blocks:

491

max_stashed_blocks = temp_stash_usage

492

493

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

494

495

out.append("%s %d %d %s %s %s %s\n" % (

496

xf.style,

497

xf.patch_start, xf.patch_len,

498

xf.src_sha1,

499

xf.tgt_sha1,

500

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

501

total += tgt_size

502

elif xf.style == "zero":

503

assert xf.tgt_ranges

504

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

505

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

506

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

507

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

508

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

509

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

510

if free_string:

511

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

512

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

513

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

514

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

515

# Sanity check: abort if we're going to need more stash space than

516

# the allowed size (cache_size * threshold). There are two purposes

517

# of having a threshold here. a) Part of the cache may have been

518

# occupied by some recovery logs. b) It will buy us some time to deal

519

# with the oversize issue.

520

cache_size = common.OPTIONS.cache_size

521

stash_threshold = common.OPTIONS.stash_threshold

522

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

523

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

524

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

525

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

526

self.tgt.blocksize, max_allowed, cache_size,

527

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

528

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

529

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

530

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

531

# Zero out extended blocks as a workaround for bug 20881595.

532

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

533

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

534

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

535

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

536

537

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

538

# data in the new image; b) will not be touched by dm-verity. Out of those

539

# blocks, we erase the ones that won't be used in this update at the

540

# beginning of an update. The rest would be erased at the end. This is to

541

# work around the eMMC issue observed on some devices, which may otherwise

542

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

543

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

544

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

545

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

546

547

erase_first = new_dontcare.subtract(self.touched_src_ranges)

548

if erase_first:

549

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

550

551

erase_last = new_dontcare.subtract(erase_first)

552

if erase_last:

553

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

554

555

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

556

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

557

# v3+: the number of stash slots is unused.

558

out.insert(2, "0\n")

559

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

560

561

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

565

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

566

OPTIONS = common.OPTIONS

567

if OPTIONS.cache_size is not None:

568

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

569

print("max stashed blocks: %d (%d bytes), "

570

"limit: %d bytes (%.2f%%)\n" % (

571

max_stashed_blocks, self._max_stashed_size, max_allowed,

572

self._max_stashed_size * 100.0 / max_allowed))

573

else:

574

print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (

575

max_stashed_blocks, self._max_stashed_size))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

576

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

577

def ReviseStashSize(self):

578

print("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

579

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

580

581

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

582

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

583

for xf in self.transfers:

584

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

585

for stash_raw_id, sr in xf.stash_before:

586

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

587

588

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

589

for stash_raw_id, _ in xf.use_stash:

590

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

591

592

# Compute the maximum blocks available for stash based on /cache size and

593

# the threshold.

594

cache_size = common.OPTIONS.cache_size

595

stash_threshold = common.OPTIONS.stash_threshold

596

max_allowed = cache_size * stash_threshold / self.tgt.blocksize

597

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

598

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

599

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

600

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

601

new_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

602

603

# Now go through all the commands. Compute the required stash size on the

604

# fly. If a command requires excess stash than available, it deletes the

605

# stash by replacing the command that uses the stash with a "new" command

606

# instead.

607

for xf in self.transfers:

608

replaced_cmds = []

609

610

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

611

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

612

# Check the post-command stashed_blocks.

613

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

614

sh = self.src.RangeSha1(sr)

615

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

616

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

617

618

if stashed_blocks_after > max_allowed:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

619

# We cannot stash this one for a later command. Find out the command

620

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

621

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

622

replaced_cmds.append(use_cmd)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

623

print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

624

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

625

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

626

if sh in stashes:

627

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

628

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

629

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

630

stashed_blocks = stashed_blocks_after

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

631

632

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

633

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

634

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

635

assert xf.tgt_ranges and xf.src_ranges

636

if xf.src_ranges.overlaps(xf.tgt_ranges):

637

if stashed_blocks + xf.src_ranges.size() > max_allowed:

638

replaced_cmds.append(xf)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

639

print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

640

641

# Replace the commands in replaced_cmds with "new"s.

642

for cmd in replaced_cmds:

643

# It no longer uses any commands in "use_stash". Remove the def points

644

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

645

for stash_raw_id, sr in cmd.use_stash:

646

def_cmd = stash_map[stash_raw_id][1]

647

assert (stash_raw_id, sr) in def_cmd.stash_before

648

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

649

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

650

# Add up blocks that violates space limit and print total number to

651

# screen later.

652

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

653

cmd.ConvertToNew()

654

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

655

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

656

for _, sr in xf.use_stash:

657

sh = self.src.RangeSha1(sr)

658

assert sh in stashes

659

stashes[sh] -= 1

660

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

661

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

662

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

663

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

664

num_of_bytes = new_blocks * self.tgt.blocksize

665

print(" Total %d blocks (%d bytes) are packed as new blocks due to "

666

"insufficient cache size." % (new_blocks, num_of_bytes))

Tao Bao

304ee27

2016-12-19 11:01:38 -0800

[diff] [blame]

667

return new_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

668

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

669

def ComputePatches(self, prefix):

670

print("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

671

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

672

patch_num = 0

673

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

674

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

675

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

676

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

677

print("%10d %10d (%6.2f%%) %7s %s %s" % (

678

tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,

679

str(xf.tgt_ranges)))

680

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

681

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

682

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

683

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

684

print("%10d %10d (%6.2f%%) %7s %s %s" % (

685

tgt_size, tgt_size, 100.0, xf.style,

686

xf.tgt_name, str(xf.tgt_ranges)))

687

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

688

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

689

# We can't compare src and tgt directly because they may have

690

# the same content but be broken up into blocks differently, eg:

691

#

692

# ["he", "llo"] vs ["h", "ello"]

693

#

694

# We want those to compare equal, ideally without having to

695

# actually concatenate the strings (these may be tens of

696

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

697

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

698

# These are identical; we don't need to generate a patch,

699

# just issue copy commands on the device.

700

xf.style = "move"

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

701

xf.patch = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

702

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

703

if xf.src_ranges != xf.tgt_ranges:

704

print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (

705

tgt_size, tgt_size, 100.0, xf.style,

706

xf.tgt_name if xf.tgt_name == xf.src_name else (

707

xf.tgt_name + " (from " + xf.src_name + ")"),

708

str(xf.tgt_ranges), str(xf.src_ranges)))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

709

else:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

710

if xf.patch:

711

# We have already generated the patch with imgdiff. Check if the

712

# transfer is intact.

713

assert not self.disable_imgdiff

imgdiff = True

if not xf.intact:

imgdiff = False

xf.patch = None

else:

# For files in zip format (eg, APKs, JARs, etc.) we would

720

# like to use imgdiff -z if possible (because it usually

721

# produces significantly smaller patches than bsdiff).

722

# This is permissible if:

723

#

724

# - imgdiff is not disabled, and

725

# - the source and target files are monotonic (ie, the

726

# data is stored with blocks in increasing order), and

727

# - we haven't removed any blocks from the source set.

728

#

729

# If these conditions are satisfied then appending all the

730

# blocks in the set together in order will produce a valid

731

# zip file (plus possibly extra zeros in the last block),

732

# which is what imgdiff needs to operate. (imgdiff is

733

# fine with extra zeros at the end of the file.)

734

imgdiff = (not self.disable_imgdiff and xf.intact and

735

xf.tgt_name.split(".")[-1].lower()

736

in ("apk", "jar", "zip"))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

737

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

738

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

743

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

744

if diff_queue:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

745

if self.threads > 1:

746

print("Computing patches (using %d threads)..." % (self.threads,))

747

else:

748

print("Computing patches...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

749

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

750

diff_total = len(diff_queue)

751

patches = [None] * diff_total

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

752

error_messages = []

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

753

warning_messages = []

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

754

if sys.stdout.isatty():

755

global diff_done

756

diff_done = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

757

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

758

# Using multiprocessing doesn't give additional benefits, due to the

759

# pattern of the code. The diffing work is done by subprocess.call, which

760

# already runs in a separate process (not affected much by the GIL -

761

# Global Interpreter Lock). Using multiprocess also requires either a)

762

# writing the diff input files in the main process before forking, or b)

763

# reopening the image file (SparseImage) in the worker processes. Doing

764

# neither of them further improves the performance.

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

765

lock = threading.Lock()

766

def diff_worker():

767

while True:

768

with lock:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

769

if not diff_queue:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

770

return

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

771

xf_index, imgdiff, patch_index = diff_queue.pop()

772

773

xf = self.transfers[xf_index]

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

774

patch = xf.patch

775

if not patch:

776

src_ranges = xf.src_ranges

777

tgt_ranges = xf.tgt_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

778

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame^]

779

src_file = common.MakeTempFile(prefix="src-")

780

with open(src_file, "wb") as fd:

781

self.src.WriteRangeDataToFd(src_ranges, fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

782

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame^]

783

tgt_file = common.MakeTempFile(prefix="tgt-")

784

with open(tgt_file, "wb") as fd:

785

self.tgt.WriteRangeDataToFd(tgt_ranges, fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

message = []

try:

patch = compute_patch(src_file, tgt_file, imgdiff)

790

except ValueError as e:

791

message.append(

792

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

793

"imgdiff" if imgdiff else "bsdiff",

794

xf.tgt_name if xf.tgt_name == xf.src_name else

795

xf.tgt_name + " (from " + xf.src_name + ")",

796

xf.tgt_ranges, xf.src_ranges, e.message))

797

# TODO(b/68016761): Better handle the holes in mke2fs created

# images.

if imgdiff:

try:

patch = compute_patch(src_file, tgt_file, imgdiff=False)

802

message.append(

803

"Fell back and generated with bsdiff instead for %s" % (

xf.tgt_name,))

xf.style = "bsdiff"

with lock:

warning_messages.extend(message)

808

del message[:]

809

except ValueError as e:

810

message.append(

811

"Also failed to generate with bsdiff for %s:\n%s" % (

812

xf.tgt_name, e.message))

if message:

with lock:

error_messages.extend(message)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

817

818

with lock:

819

patches[patch_index] = (xf_index, patch)

820

if sys.stdout.isatty():

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

821

global diff_done

822

diff_done += 1

823

progress = diff_done * 100 / diff_total

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

824

# '\033[K' is to clear to EOL.

825

print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')

826

sys.stdout.flush()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

827

828

threads = [threading.Thread(target=diff_worker)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

829

for _ in range(self.threads)]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

834

835

if sys.stdout.isatty():

836

print('\n')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

837

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

838

if warning_messages:

839

print('WARNING:')

840

print('\n'.join(warning_messages))

841

print('\n\n\n')

842

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

843

if error_messages:

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

844

print('ERROR:')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

845

print('\n'.join(error_messages))

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame]

846

print('\n\n\n')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

847

sys.exit(1)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

else:

patches = []

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

851

offset = 0

852

with open(prefix + ".patch.dat", "wb") as patch_fd:

853

for index, patch in patches:

854

xf = self.transfers[index]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

855

xf.patch_len = len(patch)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

856

xf.patch_start = offset

857

offset += xf.patch_len

858

patch_fd.write(patch)

859

860

if common.OPTIONS.verbose:

861

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

862

print("%10d %10d (%6.2f%%) %7s %s %s %s" % (

863

xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,

864

xf.style,

865

xf.tgt_name if xf.tgt_name == xf.src_name else (

866

xf.tgt_name + " (from " + xf.src_name + ")"),

867

xf.tgt_ranges, xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

868

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

869

def AssertSha1Good(self):

870

"""Check the SHA-1 of the src & tgt blocks in the transfer list.

871

872

Double check the SHA-1 value to avoid the issue in b/71908713, where

873

SparseImage.RangeSha1() messed up with the hash calculation in multi-thread

874

environment. That specific problem has been fixed by protecting the

875

underlying generator function 'SparseImage._GetRangeData()' with lock.

876

"""

877

for xf in self.transfers:

878

tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)

879

assert xf.tgt_sha1 == tgt_sha1

880

if xf.style == "diff":

881

src_sha1 = self.src.RangeSha1(xf.src_ranges)

882

assert xf.src_sha1 == src_sha1

883

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

884

def AssertSequenceGood(self):

885

# Simulate the sequences of transfers we will output, and check that:

886

# - we never read a block after writing it, and

887

# - we write every block we care about exactly once.

888

889

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

890

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

891

892

# Imagine processing the transfers in order.

893

for xf in self.transfers:

894

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

895

896

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

897

for _, sr in xf.use_stash:

898

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

899

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

900

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

901

# Source image could be larger. Don't check the blocks that are in the

902

# source image only. Since they are not in 'touched', and won't ever

903

# be touched.

904

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

905

assert touched[i] == 0

906

907

# Check that the output blocks for this transfer haven't yet

908

# been touched, and touch all the blocks written by this

909

# transfer.

910

for s, e in xf.tgt_ranges:

911

for i in range(s, e):

912

assert touched[i] == 0

913

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

914

915

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

916

for s, e in self.tgt.care_map:

917

for i in range(s, e):

918

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

919

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

920

def ImproveVertexSequence(self):

921

print("Improving vertex order...")

922

923

# At this point our digraph is acyclic; we reversed any edges that

924

# were backwards in the heuristically-generated sequence. The

925

# previously-generated order is still acceptable, but we hope to

926

# find a better order that needs less memory for stashed data.

927

# Now we do a topological sort to generate a new vertex order,

928

# using a greedy algorithm to choose which vertex goes next

929

# whenever we have a choice.

930

931

# Make a copy of the edge set; this copy will get destroyed by the

932

# algorithm.

933

for xf in self.transfers:

934

xf.incoming = xf.goes_after.copy()

935

xf.outgoing = xf.goes_before.copy()

936

937

L = [] # the new vertex order

938

939

# S is the set of sources in the remaining graph; we always choose

940

# the one that leaves the least amount of stashed data after it's

941

# executed.

942

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

948

L.append(xf)

949

for u in xf.outgoing:

950

del u.incoming[xf]

951

if not u.incoming:

952

heapq.heappush(S, (u.NetStashChange(), u.order, u))

953

954

# if this fails then our graph had a cycle.

955

assert len(L) == len(self.transfers)

956

957

self.transfers = L

958

for i, xf in enumerate(L):

959

xf.order = i

960

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

961

def RemoveBackwardEdges(self):

962

print("Removing backward edges...")

in_order = 0

out_of_order = 0

lost_source = 0

for xf in self.transfers:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

968

lost = 0

969

size = xf.src_ranges.size()

970

for u in xf.goes_before:

971

# xf should go before u

972

if xf.order < u.order:

973

# it does, hurray!

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

974

in_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

975

else:

976

# it doesn't, boo. trim the blocks that u writes from xf's

977

# source, so that xf can go after u.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

978

out_of_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

979

assert xf.src_ranges.overlaps(u.tgt_ranges)

980

xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)

981

xf.intact = False

982

983

if xf.style == "diff" and not xf.src_ranges:

984

# nothing left to diff from; treat as new data

985

xf.style = "new"

986

987

lost = size - xf.src_ranges.size()

988

lost_source += lost

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

989

990

print((" %d/%d dependencies (%.2f%%) were violated; "

991

"%d source blocks removed.") %

992

(out_of_order, in_order + out_of_order,

993

(out_of_order * 100.0 / (in_order + out_of_order))

994

if (in_order + out_of_order) else 0.0,

995

lost_source))

996

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

997

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

998

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

999

1000

For each transfer, make sure it properly stashes the blocks it touches and

1001

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

1002

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

1003

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

1004

it is possible to have multiple pairs with different 'stash_raw_id's. Each

1005

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

1006

blocks will be written to the same stash slot in WriteTransfers().

1007

"""

1008

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1009

print("Reversing backward edges...")

1010

in_order = 0

1011

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1012

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1013

stash_size = 0

1014

1015

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1016

for u in xf.goes_before.copy():

1017

# xf should go before u

1018

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

1023

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

1028

assert overlap

1029

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1030

u.stash_before.append((stash_raw_id, overlap))

1031

xf.use_stash.append((stash_raw_id, overlap))

1032

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1033

stash_size += overlap.size()

1034

1035

# reverse the edge direction; now xf must go after u

1036

del xf.goes_before[u]

1037

del u.goes_after[xf]

1038

xf.goes_after[u] = None # value doesn't matter

1039

u.goes_before[xf] = None

1040

1041

print((" %d/%d dependencies (%.2f%%) were violated; "

1042

"%d source blocks stashed.") %

1043

(out_of_order, in_order + out_of_order,

1044

(out_of_order * 100.0 / (in_order + out_of_order))

1045

if (in_order + out_of_order) else 0.0,

1046

stash_size))

1047

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1048

def FindVertexSequence(self):

1049

print("Finding vertex sequence...")

1050

1051

# This is based on "A Fast & Effective Heuristic for the Feedback

1052

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1053

# it as starting with the digraph G and moving all the vertices to

1054

# be on a horizontal line in some order, trying to minimize the

1055

# number of edges that end up pointing to the left. Left-pointing

1056

# edges will get removed to turn the digraph into a DAG. In this

1057

# case each edge has a weight which is the number of source blocks

1058

# we'll lose if that edge is removed; we try to minimize the total

1059

# weight rather than just the number of edges.

1060

1061

# Make a copy of the edge set; this copy will get destroyed by the

1062

# algorithm.

1063

for xf in self.transfers:

1064

xf.incoming = xf.goes_after.copy()

1065

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1066

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1067

1068

# We use an OrderedDict instead of just a set so that the output

1069

# is repeatable; otherwise it would depend on the hash values of

1070

# the transfer objects.

1071

G = OrderedDict()

1072

for xf in self.transfers:

1073

G[xf] = None

1074

s1 = deque() # the left side of the sequence, built from left to right

1075

s2 = deque() # the right side of the sequence, built from right to left

1076

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1077

heap = []

1078

for xf in self.transfers:

1079

xf.heap_item = HeapItem(xf)

1080

heap.append(xf.heap_item)

1081

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1082

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1083

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1084

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1085

# { key1: None, key2: None, ... }.

1086

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1087

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1088

1089

def adjust_score(iu, delta):

1090

iu.score += delta

1091

iu.heap_item.clear()

1092

iu.heap_item = HeapItem(iu)

1093

heapq.heappush(heap, iu.heap_item)

1094

1095

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1096

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1097

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1098

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1099

for u in sinks:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1100

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1101

s2.appendleft(u)

1102

del G[u]

1103

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1104

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1105

if not iu.outgoing:

1106

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1107

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1108

1109

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1110

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1111

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1112

for u in sources:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1113

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1114

s1.append(u)

1115

del G[u]

1116

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1117

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1118

if not iu.incoming:

1119

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1120

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1121

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1122

if not G: break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1123

1124

# Find the "best" vertex to put next. "Best" is the one that

1125

# maximizes the net difference in source blocks saved we get by

1126

# pretending it's a source rather than a sink.

1127

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1128

while True:

1129

u = heapq.heappop(heap)

1130

if u and u.item in G:

1131

u = u.item

1132

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1133

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1134

s1.append(u)

1135

del G[u]

1136

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1137

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1138

if not iu.incoming:

1139

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1140

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1141

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1142

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1143

if not iu.outgoing:

1144

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1145

1146

# Now record the sequence in the 'order' field of each transfer,

1147

# and by rearranging self.transfers to be in the chosen sequence.

1148

1149

new_transfers = []

1150

for x in itertools.chain(s1, s2):

1151

x.order = len(new_transfers)

1152

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1157

1158

def GenerateDigraph(self):

1159

print("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1160

1161

# Each item of source_ranges will be:

1162

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1163

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1164

source_ranges = []

1165

for b in self.transfers:

1166

for s, e in b.src_ranges:

1167

if e > len(source_ranges):

1168

source_ranges.extend([None] * (e-len(source_ranges)))

1169

for i in range(s, e):

1170

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1171

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1172

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1173

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1174

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1175

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1176

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1177

for s, e in a.tgt_ranges:

1178

for i in range(s, e):

1179

if i >= len(source_ranges): break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1180

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1181

if source_ranges[i] is not None:

1182

for j in source_ranges[i]:

1183

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1184

1185

for b in intersections:

1186

if a is b: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1187

1188

# If the blocks written by A are read by B, then B needs to go before A.

1189

i = a.tgt_ranges.intersect(b.src_ranges)

1190

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1191

if b.src_name == "__ZERO":

1192

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1197

b.goes_before[a] = size

1198

a.goes_after[b] = size

1199

1200

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1201

"""Parse the file_map to generate all the transfers."""

1202

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1203

def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1204

src_ranges, style, by_id):

1205

"""Add one or multiple Transfer()s by splitting large files.

1206

1207

For BBOTA v3, we need to stash source blocks for resumable feature.

1208

However, with the growth of file size and the shrink of the cache

1209

partition source blocks are too large to be stashed. If a file occupies

1210

too many blocks, we split it into smaller pieces by getting multiple

1211

Transfer()s.

1212

1213

The downside is that after splitting, we may increase the package size

1214

since the split pieces don't align well. According to our experiments,

1215

1/8 of the cache size as the per-piece limit appears to be optimal.

1216

Compared to the fixed 1024-block limit, it reduces the overall package

1217

size by 30% for volantis, and 20% for angler and bullhead."""

1218

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1219

pieces = 0

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1220

while (tgt_ranges.size() > max_blocks_per_transfer and

1221

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1222

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1223

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1224

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1225

src_first = src_ranges.first(max_blocks_per_transfer)

1226

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1227

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1228

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1229

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1230

1231

tgt_ranges = tgt_ranges.subtract(tgt_first)

1232

src_ranges = src_ranges.subtract(src_first)

1233

pieces += 1

1234

1235

# Handle remaining blocks.

1236

if tgt_ranges.size() or src_ranges.size():

1237

# Must be both non-empty.

1238

assert tgt_ranges.size() and src_ranges.size()

1239

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1240

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1241

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1242

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1243

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1244

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1245

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,

1246

by_id):

1247

"""Find all the zip files and split the others with a fixed chunk size.

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1248

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1249

This function will construct a list of zip archives, which will later be

1250

split by imgdiff to reduce the final patch size. For the other files,

1251

we will plainly split them based on a fixed chunk size with the potential

1252

patch size penalty.

1253

"""

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1254

1255

assert style == "diff"

1256

1257

# Change nothing for small files.

1258

if (tgt_ranges.size() <= max_blocks_per_transfer and

1259

src_ranges.size() <= max_blocks_per_transfer):

1260

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1261

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

style, by_id)

return

if tgt_name.split(".")[-1].lower() in ("apk", "jar", "zip"):

1266

split_enable = (not self.disable_imgdiff and src_ranges.monotonic and

1267

tgt_ranges.monotonic)

1268

if split_enable and (self.tgt.RangeSha1(tgt_ranges) !=

1269

self.src.RangeSha1(src_ranges)):

1270

large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))

1271

return

1272

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1273

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1274

src_ranges, style, by_id)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1275

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1276

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1277

split=False):

1278

"""Wrapper function for adding a Transfer()."""

1279

1280

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1281

# otherwise add the Transfer() as is.

1282

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1283

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1284

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1285

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1286

return

1287

1288

# Handle .odex files specially to analyze the block-wise difference. If

1289

# most of the blocks are identical with only few changes (e.g. header),

1290

# we will patch the changed blocks only. This avoids stashing unchanged

1291

# blocks while patching. We limit the analysis to files without size

1292

# changes only. This is to avoid sacrificing the OTA generation cost too

1293

# much.

1294

if (tgt_name.split(".")[-1].lower() == 'odex' and

1295

tgt_ranges.size() == src_ranges.size()):

1296

1297

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1298

# few blocks remain identical, we lose the opportunity to use imgdiff

1299

# that may have better compression ratio than bsdiff.

1300

crop_threshold = 0.5

1301

1302

tgt_skipped = RangeSet()

1303

src_skipped = RangeSet()

1304

tgt_size = tgt_ranges.size()

1305

tgt_changed = 0

1306

for src_block, tgt_block in zip(src_ranges.next_item(),

1307

tgt_ranges.next_item()):

1308

src_rs = RangeSet(str(src_block))

1309

tgt_rs = RangeSet(str(tgt_block))

1310

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1311

tgt_skipped = tgt_skipped.union(tgt_rs)

1312

src_skipped = src_skipped.union(src_rs)

1313

else:

1314

tgt_changed += tgt_rs.size()

1315

1316

# Terminate early if no clear sign of benefits.

1317

if tgt_changed > tgt_size * crop_threshold:

1318

break

1319

1320

if tgt_changed < tgt_size * crop_threshold:

1321

assert tgt_changed + tgt_skipped.size() == tgt_size

1322

print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,

1323

tgt_skipped.size() * 100.0 / tgt_size, tgt_name))

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1324

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1325

"%s-skipped" % (tgt_name,),

1326

"%s-skipped" % (src_name,),

1327

tgt_skipped, src_skipped, style, by_id)

1328

1329

# Intentionally change the file extension to avoid being imgdiff'd as

1330

# the files are no longer in their original format.

1331

tgt_name = "%s-cropped" % (tgt_name,)

1332

src_name = "%s-cropped" % (src_name,)

1333

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1334

src_ranges = src_ranges.subtract(src_skipped)

1335

1336

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1341

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1342

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1343

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1344

def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,

1345

split_info):

1346

"""Parse the split_info and return a list of info tuples.

1347

1348

Args:

1349

patch_size: total size of the patch file.

1350

tgt_ranges: Ranges of the target file within the original image.

1351

src_ranges: Ranges of the source file within the original image.

split_info format:

imgdiff version#

count of pieces

<patch_size_1> <tgt_size_1> <src_ranges_1>

1356

...

1357

<patch_size_n> <tgt_size_n> <src_ranges_n>

1358

1359

Returns:

1360

[patch_start, patch_len, split_tgt_ranges, split_src_ranges]

1361

"""

1362

1363

version = int(split_info[0])

1364

assert version == 2

1365

count = int(split_info[1])

1366

assert len(split_info) - 2 == count

split_info_list = []

patch_start = 0

tgt_remain = copy.deepcopy(tgt_ranges)

1371

# each line has the format <patch_size>, <tgt_size>, <src_ranges>

1372

for line in split_info[2:]:

1373

info = line.split()

1374

assert len(info) == 3

1375

patch_length = int(info[0])

1376

1377

split_tgt_size = int(info[1])

1378

assert split_tgt_size % 4096 == 0

1379

assert split_tgt_size / 4096 <= tgt_remain.size()

1380

split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)

1381

tgt_remain = tgt_remain.subtract(split_tgt_ranges)

1382

1383

# Find the split_src_ranges within the image file from its relative

1384

# position in file.

1385

split_src_indices = RangeSet.parse_raw(info[2])

1386

split_src_ranges = RangeSet()

1387

for r in split_src_indices:

1388

curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))

1389

assert not split_src_ranges.overlaps(curr_range)

1390

split_src_ranges = split_src_ranges.union(curr_range)

1391

1392

split_info_list.append((patch_start, patch_length,

1393

split_tgt_ranges, split_src_ranges))

1394

patch_start += patch_length

1395

1396

# Check that the sizes of all the split pieces add up to the final file

1397

# size for patch and target.

1398

assert tgt_remain.size() == 0

1399

assert patch_start == patch_size

1400

return split_info_list

1401

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1402

def SplitLargeApks():

1403

"""Split the large apks files.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1404

1405

Example: Chrome.apk will be split into

1406

src-0: Chrome.apk-0, tgt-0: Chrome.apk-0

1407

src-1: Chrome.apk-1, tgt-1: Chrome.apk-1

1408

...

1409

1410

After the split, the target pieces are continuous and block aligned; and

1411

the source pieces are mutually exclusive. During the split, we also

1412

generate and save the image patch between src-X & tgt-X. This patch will

1413

be valid because the block ranges of src-X & tgt-X will always stay the

1414

same afterwards; but there's a chance we don't use the patch if we

1415

convert the "diff" command into "new" or "move" later.

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1416

1417

The split will be attempted by calling imgdiff, which expects the input

1418

files to be valid zip archives. If imgdiff fails for some reason (i.e.

1419

holes in the APK file), we will fall back to split the failed APKs into

1420

fixed size chunks.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

"""

while True:

with transfer_lock:

if not large_apks:

return

tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)

1428

1429

src_file = common.MakeTempFile(prefix="src-")

1430

tgt_file = common.MakeTempFile(prefix="tgt-")

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame^]

1431

with open(src_file, "wb") as src_fd:

1432

self.src.WriteRangeDataToFd(src_ranges, src_fd)

1433

with open(tgt_file, "wb") as tgt_fd:

1434

self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1435

1436

patch_file = common.MakeTempFile(prefix="patch-")

1437

patch_info_file = common.MakeTempFile(prefix="split_info-")

1438

cmd = ["imgdiff", "-z",

1439

"--block-limit={}".format(max_blocks_per_transfer),

1440

"--split-info=" + patch_info_file,

1441

src_file, tgt_file, patch_file]

1442

p = common.Run(cmd, stdout=subprocess.PIPE)

1443

p.communicate()

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1444

if p.returncode != 0:

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1445

print("Failed to create patch between {} and {},"

1446

" falling back to bsdiff".format(src_name, tgt_name))

1447

with transfer_lock:

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1448

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name,

1449

tgt_ranges, src_ranges,

1450

"diff", self.transfers)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1451

continue

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1452

1453

with open(patch_info_file) as patch_info:

1454

lines = patch_info.readlines()

1455

1456

patch_size_total = os.path.getsize(patch_file)

1457

split_info_list = ParseAndValidateSplitInfo(patch_size_total,

1458

tgt_ranges, src_ranges,

1459

lines)

1460

for index, (patch_start, patch_length, split_tgt_ranges,

1461

split_src_ranges) in enumerate(split_info_list):

1462

with open(patch_file) as f:

1463

f.seek(patch_start)

1464

patch_content = f.read(patch_length)

1465

1466

split_src_name = "{}-{}".format(src_name, index)

1467

split_tgt_name = "{}-{}".format(tgt_name, index)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1468

split_large_apks.append((split_tgt_name,

split_src_name,

split_tgt_ranges,

split_src_ranges,

patch_content))

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1473

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1474

print("Finding transfers...")

1475

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1476

large_apks = []

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1477

split_large_apks = []

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1478

cache_size = common.OPTIONS.cache_size

1479

split_threshold = 0.125

1480

max_blocks_per_transfer = int(cache_size * split_threshold /

1481

self.tgt.blocksize)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1482

empty = RangeSet()

Tianjie Xu

20a86cd

2018-01-12 12:21:00 -0800

[diff] [blame]

1483

for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1484

if tgt_fn == "__ZERO":

1485

# the special "__ZERO" domain is all the blocks not contained

1486

# in any file and that are filled with zeros. We have a

1487

# special transfer style for zero blocks.

1488

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1489

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1490

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1491

continue

1492

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1493

elif tgt_fn == "__COPY":

1494

# "__COPY" domain includes all the blocks not contained in any

1495

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1496

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1497

continue

1498

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1499

elif tgt_fn in self.src.file_map:

1500

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1501

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1502

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1503

continue

1504

1505

b = os.path.basename(tgt_fn)

1506

if b in self.src_basenames:

1507

# Look for an exact basename match in the source.

1508

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1509

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1510

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1511

continue

1512

1513

b = re.sub("[0-9]+", "#", b)

1514

if b in self.src_numpatterns:

1515

# Look for a 'number pattern' match (a basename match after

1516

# all runs of digits are replaced by "#"). (This is useful

1517

# for .so files that contain version numbers in the filename

1518

# that get bumped.)

1519

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1520

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1521

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1522

continue

1523

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1524

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1525

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1526

transfer_lock = threading.Lock()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1527

threads = [threading.Thread(target=SplitLargeApks)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1528

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1534

# Sort the split transfers for large apks to generate a determinate package.

1535

split_large_apks.sort()

1536

for (tgt_name, src_name, tgt_ranges, src_ranges,

1537

patch) in split_large_apks:

1538

transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1539

self.tgt.RangeSha1(tgt_ranges),

1540

self.src.RangeSha1(src_ranges),

1541

"diff", self.transfers)

1542

transfer_split.patch = patch

1543

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1544

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1545

for k in self.src.file_map.keys():

1546

b = os.path.basename(k)

1547

self.src_basenames[b] = k

1548

b = re.sub("[0-9]+", "#", b)

1549

self.src_numpatterns[b] = k

1550

1551

@staticmethod

1552

def AssertPartition(total, seq):

1553

"""Assert that all the RangeSets in 'seq' form a partition of the

1554

'total' RangeSet (ie, they are nonintersecting and their union

1555

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1556

Doug Zongker