Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

18

import copy

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

20

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

21

import itertools

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

22

import logging

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

23

import multiprocessing

24

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

25

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

27

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

29

from collections import deque, OrderedDict

30

from hashlib import sha1

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

31

32

import common

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

33

from rangelib import RangeSet

34

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

35

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

36

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

37

logger = logging.getLogger(__name__)

38

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

39

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

40

def compute_patch(srcfile, tgtfile, imgdiff=False):

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

41

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

42

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

43

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

44

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

45

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

46

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

47

# here, since they contain temp filenames only.

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

48

proc = common.Run(cmd, verbose=False)

49

output, _ = proc.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

50

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

51

if proc.returncode != 0:

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

52

raise ValueError(output)

53

54

with open(patchfile, 'rb') as f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

55

return f.read()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

56

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

57

58

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

59

def RangeSha1(self, ranges):

60

raise NotImplementedError

61

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

62

def ReadRangeSet(self, ranges):

63

raise NotImplementedError

64

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

65

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

66

raise NotImplementedError

67

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

68

def WriteRangeDataToFd(self, ranges, fd):

69

raise NotImplementedError

70

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

71

72

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

73

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

74

75

def __init__(self):

76

self.blocksize = 4096

77

self.care_map = RangeSet()

78

self.clobbered_blocks = RangeSet()

79

self.extended = RangeSet()

80

self.total_blocks = 0

81

self.file_map = {}

82

83

def RangeSha1(self, ranges):

84

return sha1().hexdigest()

85

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

86

def ReadRangeSet(self, ranges):

87

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

88

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

89

def TotalSha1(self, include_clobbered_blocks=False):

90

# EmptyImage always carries empty clobbered_blocks, so

91

# include_clobbered_blocks can be ignored.

92

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

93

return sha1().hexdigest()

94

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

95

def WriteRangeDataToFd(self, ranges, fd):

96

raise ValueError("Can't write data from EmptyImage to file")

97

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

98

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

99

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

100

"""An image wrapped around a single string of data."""

101

102

def __init__(self, data, trim=False, pad=False):

103

self.data = data

104

self.blocksize = 4096

105

106

assert not (trim and pad)

107

108

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

109

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

110

if partial > 0:

111

if trim:

112

self.data = self.data[:-partial]

113

elif pad:

114

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

115

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

116

else:

117

raise ValueError(("data for DataImage must be multiple of %d bytes "

118

"unless trim or pad is specified") %

119

(self.blocksize,))

120

121

assert len(self.data) % self.blocksize == 0

122

123

self.total_blocks = len(self.data) / self.blocksize

124

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

125

# When the last block is padded, we always write the whole block even for

126

# incremental OTAs. Because otherwise the last block may get skipped if

127

# unchanged for an incremental, but would fail the post-install

128

# verification if it has non-zero contents in the padding bytes.

129

# Bug: 23828506

130

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

131

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

132

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

133

clobbered_blocks = []

134

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

135

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

140

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

141

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

142

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

143

if d == reference:

144

zero_blocks.append(i)

145

zero_blocks.append(i+1)

146

else:

147

nonzero_blocks.append(i)

148

nonzero_blocks.append(i+1)

149

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

150

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

151

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

152

self.file_map = dict()

153

if zero_blocks:

154

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

155

if nonzero_blocks:

156

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

157

if clobbered_blocks:

158

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

159

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

160

def _GetRangeData(self, ranges):

161

for s, e in ranges:

162

yield self.data[s*self.blocksize:e*self.blocksize]

163

164

def RangeSha1(self, ranges):

165

h = sha1()

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

166

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

170

def ReadRangeSet(self, ranges):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

171

return [self._GetRangeData(ranges)]

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

172

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

173

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

174

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

175

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

176

else:

177

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

178

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

179

def WriteRangeDataToFd(self, ranges, fd):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

180

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

181

fd.write(data)

182

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

183

184

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

185

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

186

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

187

self.tgt_name = tgt_name

188

self.src_name = src_name

189

self.tgt_ranges = tgt_ranges

190

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

191

self.tgt_sha1 = tgt_sha1

192

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

193

self.style = style

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

194

195

# We use OrderedDict rather than dict so that the output is repeatable;

196

# otherwise it would depend on the hash values of the Transfer objects.

197

self.goes_before = OrderedDict()

198

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

199

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

200

self.stash_before = []

201

self.use_stash = []

202

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

self._patch = None

@property

def patch(self):

return self._patch

@patch.setter

def patch(self, patch):

214

if patch:

215

assert self.style == "diff"

216

self._patch = patch

217

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

218

def NetStashChange(self):

219

return (sum(sr.size() for (_, sr) in self.stash_before) -

220

sum(sr.size() for (_, sr) in self.use_stash))

221

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

222

def ConvertToNew(self):

223

assert self.style != "new"

224

self.use_stash = []

225

self.style = "new"

226

self.src_ranges = RangeSet()

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

227

self.patch = None

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

228

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

229

def __str__(self):

230

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

231

" to " + str(self.tgt_ranges) + ">")

232

233

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

234

@functools.total_ordering

235

class HeapItem(object):

236

def __init__(self, item):

237

self.item = item

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

238

# Negate the score since python's heap is a min-heap and we want the

239

# maximum score.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

240

self.score = -item.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

241

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

242

def clear(self):

243

self.item = None

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

244

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

245

def __bool__(self):

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

246

return self.item is not None

247

248

# Python 2 uses __nonzero__, while Python 3 uses __bool__.

249

__nonzero__ = __bool__

250

251

# The rest operations are generated by functools.total_ordering decorator.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

252

def __eq__(self, other):

253

return self.score == other.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

254

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

255

def __le__(self, other):

256

return self.score <= other.score

257

258

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

259

class ImgdiffStats(object):

260

"""A class that collects imgdiff stats.

261

262

It keeps track of the files that will be applied imgdiff while generating

263

BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific

264

reasons. The stats is only meaningful when imgdiff not being disabled by the

265

caller of BlockImageDiff. In addition, only files with supported types

266

(BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

267

"""

268

269

USED_IMGDIFF = "APK files diff'd with imgdiff"

270

USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"

271

272

# Reasons for not applying imgdiff on APKs.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

273

SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

274

SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

275

SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

276

277

# The list of valid reasons, which will also be the dumped order in a report.

278

REASONS = (

279

USED_IMGDIFF,

280

USED_IMGDIFF_LARGE_APK,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

281

SKIPPED_NONMONOTONIC,

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

282

SKIPPED_SHARED_BLOCKS,

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

283

SKIPPED_INCOMPLETE,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

)

def __init__(self):

self.stats = {}

def Log(self, filename, reason):

290

"""Logs why imgdiff can or cannot be applied to the given filename.

291

292

Args:

293

filename: The filename string.

294

reason: One of the reason constants listed in REASONS.

295

296

Raises:

297

AssertionError: On unsupported filetypes or invalid reason.

298

"""

299

assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)

300

assert reason in self.REASONS

301

302

if reason not in self.stats:

303

self.stats[reason] = set()

304

self.stats[reason].add(filename)

305

306

def Report(self):

307

"""Prints a report of the collected imgdiff stats."""

308

309

def print_header(header, separator):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

310

logger.info(header)

311

logger.info(separator * len(header) + '\n')

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

312

313

print_header(' Imgdiff Stats Report ', '=')

314

for key in self.REASONS:

315

if key not in self.stats:

316

continue

317

values = self.stats[key]

318

section_header = ' {} (count: {}) '.format(key, len(values))

319

print_header(section_header, '-')

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

320

logger.info(''.join([' {}\n'.format(name) for name in values]))

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

321

322

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

323

class BlockImageDiff(object):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

324

"""Generates the diff of two block image objects.

325

326

BlockImageDiff works on two image objects. An image object is anything that

327

provides the following attributes:

328

329

blocksize: the size in bytes of a block, currently must be 4096.

330

331

total_blocks: the total size of the partition/image, in blocks.

332

333

care_map: a RangeSet containing which blocks (in the range [0,

334

total_blocks) we actually care about; i.e. which blocks contain data.

335

336

file_map: a dict that partitions the blocks contained in care_map into

337

smaller domains that are useful for doing diffs on. (Typically a domain

338

is a file, and the key in file_map is the pathname.)

339

340

clobbered_blocks: a RangeSet containing which blocks contain data but may

341

be altered by the FS. They need to be excluded when verifying the

342

partition integrity.

343

344

ReadRangeSet(): a function that takes a RangeSet and returns the data

345

contained in the image blocks of that RangeSet. The data is returned as

346

a list or tuple of strings; concatenating the elements together should

347

produce the requested data. Implementations are free to break up the

348

data into list/tuple elements in any way that is convenient.

349

350

RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of

351

all the data in the specified range.

352

353

TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of

354

all the data in the image (ie, all the blocks in the care_map minus

355

clobbered_blocks, or including the clobbered blocks if

356

include_clobbered_blocks is True).

357

358

When creating a BlockImageDiff, the src image may be None, in which case the

359

list of transfers produced will never read from the original image.

360

"""

361

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

362

def __init__(self, tgt, src=None, threads=None, version=4,

363

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

364

if threads is None:

365

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

366

if threads == 0:

367

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

368

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

369

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

370

self.transfers = []

371

self.src_basenames = {}

372

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

373

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

374

self.touched_src_ranges = RangeSet()

375

self.touched_src_sha1 = None

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

376

self.disable_imgdiff = disable_imgdiff

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

377

self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

378

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

379

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

387

assert tgt.blocksize == 4096

388

assert src.blocksize == 4096

389

390

# The range sets in each filemap should comprise a partition of

391

# the care map.

392

self.AssertPartition(src.care_map, src.file_map.values())

393

self.AssertPartition(tgt.care_map, tgt.file_map.values())

394

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

395

@property

396

def max_stashed_size(self):

397

return self._max_stashed_size

398

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

399

@staticmethod

400

def FileTypeSupportedByImgdiff(filename):

401

"""Returns whether the file type is supported by imgdiff."""

402

return filename.lower().endswith(('.apk', '.jar', '.zip'))

403

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

404

def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

405

"""Checks whether we can apply imgdiff for the given RangeSets.

406

407

For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use

408

'imgdiff -z' if possible. Because it usually produces significantly smaller

409

patches than bsdiff.

410

411

This is permissible if all of the following conditions hold.

412

- The imgdiff hasn't been disabled by the caller (e.g. squashfs);

413

- The file type is supported by imgdiff;

414

- The source and target blocks are monotonic (i.e. the data is stored with

415

blocks in increasing order);

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

416

- Both files don't contain shared blocks;

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

417

- Both files have complete lists of blocks;

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

418

- We haven't removed any blocks from the source set.

419

420

If all these conditions are satisfied, concatenating all the blocks in the

421

RangeSet in order will produce a valid ZIP file (plus possibly extra zeros

422

in the last block). imgdiff is fine with extra zeros at the end of the file.

423

424

Args:

425

name: The filename to be diff'd.

426

tgt_ranges: The target RangeSet.

427

src_ranges: The source RangeSet.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

428

large_apk: Whether this is to split a large APK.

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

Returns:

A boolean result.

"""

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

433

if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

434

return False

435

436

if not tgt_ranges.monotonic or not src_ranges.monotonic:

437

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)

438

return False

439

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

440

if (tgt_ranges.extra.get('uses_shared_blocks') or

441

src_ranges.extra.get('uses_shared_blocks')):

442

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)

443

return False

444

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

445

if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):

446

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)

447

return False

448

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

449

reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk

450

else ImgdiffStats.USED_IMGDIFF)

451

self.imgdiff_stats.Log(name, reason)

452

return True

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

453

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

454

def Compute(self, prefix):

455

# When looking for a source file to use as the diff input for a

456

# target file, we try:

457

# 1) an exact path match if available, otherwise

458

# 2) a exact basename match if available, otherwise

459

# 3) a basename match after all runs of digits are replaced by

460

# "#" if available, otherwise

461

# 4) we have no source for this target.

462

self.AbbreviateSourceNames()

463

self.FindTransfers()

464

465

# Find the ordering dependencies among transfers (this is O(n^2)

466

# in the number of transfers).

467

self.GenerateDigraph()

468

# Find a sequence of transfers that satisfies as many ordering

469

# dependencies as possible (heuristically).

470

self.FindVertexSequence()

471

# Fix up the ordering dependencies that the sequence didn't

472

# satisfy.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

473

self.ReverseBackwardEdges()

474

self.ImproveVertexSequence()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

475

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

476

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

477

if common.OPTIONS.cache_size is not None:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

478

self.ReviseStashSize()

479

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

480

# Double-check our work.

481

self.AssertSequenceGood()

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

482

self.AssertSha1Good()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

483

484

self.ComputePatches(prefix)

485

self.WriteTransfers(prefix)

486

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

487

# Report the imgdiff stats.

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

488

if not self.disable_imgdiff:

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

489

self.imgdiff_stats.Report()

490

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

491

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

492

def WriteSplitTransfers(out, style, target_blocks):

493

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

494

495

This prevents the target size of one command from being too large; and

496

might help to avoid fsync errors on some devices."""

497

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

498

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

499

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

500

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

501

while target_blocks:

502

blocks_to_write = target_blocks.first(blocks_limit)

503

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

504

total += blocks_to_write.size()

505

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

506

return total

507

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

508

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

509

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

510

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

511

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

512

# id. 'stashes' records the map from 'hash' to the ref count. The stash

513

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

514

stashes = {}

515

stashed_blocks = 0

516

max_stashed_blocks = 0

517

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

518

for xf in self.transfers:

519

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

520

for _, sr in xf.stash_before:

521

sh = self.src.RangeSha1(sr)

522

if sh in stashes:

523

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

524

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

525

stashes[sh] = 1

526

stashed_blocks += sr.size()

527

self.touched_src_ranges = self.touched_src_ranges.union(sr)

528

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

529

530

if stashed_blocks > max_stashed_blocks:

531

max_stashed_blocks = stashed_blocks

532

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

533

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

534

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

535

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

536

# <# blocks> <src ranges>

537

# OR

538

# <# blocks> <src ranges> <src locs> <stash refs...>

539

# OR

540

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

541

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

542

size = xf.src_ranges.size()

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

543

src_str_buffer = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

544

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

545

unstashed_src_ranges = xf.src_ranges

546

mapped_stashes = []

547

for _, sr in xf.use_stash:

548

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

549

sh = self.src.RangeSha1(sr)

550

sr = xf.src_ranges.map_within(sr)

551

mapped_stashes.append(sr)

552

assert sh in stashes

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

553

src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

554

stashes[sh] -= 1

555

if stashes[sh] == 0:

556

free_string.append("free %s\n" % (sh,))

557

free_size += sr.size()

558

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

559

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

560

if unstashed_src_ranges:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

561

src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

562

if xf.use_stash:

563

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

564

src_str_buffer.insert(2, mapped_unstashed.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

565

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

566

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

567

else:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

568

src_str_buffer.insert(1, "-")

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

569

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

570

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

571

src_str = " ".join(src_str_buffer)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

572

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

573

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

574

# zero <rangeset>

575

# new <rangeset>

576

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

577

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

578

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

579

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

580

581

tgt_size = xf.tgt_ranges.size()

582

583

if xf.style == "new":

584

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

585

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

586

total += tgt_size

587

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

588

assert xf.tgt_ranges

589

assert xf.src_ranges.size() == tgt_size

590

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

591

# take into account automatic stashing of overlapping blocks

592

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

593

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

594

if temp_stash_usage > max_stashed_blocks:

595

max_stashed_blocks = temp_stash_usage

596

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

597

self.touched_src_ranges = self.touched_src_ranges.union(

598

xf.src_ranges)

599

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

600

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

601

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

602

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

603

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

604

total += tgt_size

605

elif xf.style in ("bsdiff", "imgdiff"):

606

assert xf.tgt_ranges

607

assert xf.src_ranges

608

# take into account automatic stashing of overlapping blocks

609

if xf.src_ranges.overlaps(xf.tgt_ranges):

610

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

611

if temp_stash_usage > max_stashed_blocks:

612

max_stashed_blocks = temp_stash_usage

613

614

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

615

616

out.append("%s %d %d %s %s %s %s\n" % (

617

xf.style,

618

xf.patch_start, xf.patch_len,

619

xf.src_sha1,

620

xf.tgt_sha1,

621

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

622

total += tgt_size

623

elif xf.style == "zero":

624

assert xf.tgt_ranges

625

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

626

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

627

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

628

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

629

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

630

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

631

if free_string:

632

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

633

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

634

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

635

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

636

# Sanity check: abort if we're going to need more stash space than

637

# the allowed size (cache_size * threshold). There are two purposes

638

# of having a threshold here. a) Part of the cache may have been

639

# occupied by some recovery logs. b) It will buy us some time to deal

640

# with the oversize issue.

641

cache_size = common.OPTIONS.cache_size

642

stash_threshold = common.OPTIONS.stash_threshold

643

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

644

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

645

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

646

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

647

self.tgt.blocksize, max_allowed, cache_size,

648

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

649

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

650

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

651

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

652

if self.tgt.hashtree_info:

653

out.append("compute_hash_tree {} {} {} {} {}\n".format(

654

self.tgt.hashtree_info.hashtree_range.to_string_raw(),

655

self.tgt.hashtree_info.filesystem_range.to_string_raw(),

656

self.tgt.hashtree_info.hash_algorithm,

657

self.tgt.hashtree_info.salt,

658

self.tgt.hashtree_info.root_hash))

659

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

660

# Zero out extended blocks as a workaround for bug 20881595.

661

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

662

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

663

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

664

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

665

666

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

667

# data in the new image; b) will not be touched by dm-verity. Out of those

668

# blocks, we erase the ones that won't be used in this update at the

669

# beginning of an update. The rest would be erased at the end. This is to

670

# work around the eMMC issue observed on some devices, which may otherwise

671

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

672

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

673

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

674

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

675

676

erase_first = new_dontcare.subtract(self.touched_src_ranges)

677

if erase_first:

678

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

679

680

erase_last = new_dontcare.subtract(erase_first)

681

if erase_last:

682

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

683

684

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

685

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

686

# v3+: the number of stash slots is unused.

687

out.insert(2, "0\n")

688

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

689

690

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

694

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

695

OPTIONS = common.OPTIONS

696

if OPTIONS.cache_size is not None:

697

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

698

logger.info(

699

"max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",

700

max_stashed_blocks, self._max_stashed_size, max_allowed,

701

self._max_stashed_size * 100.0 / max_allowed)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

702

else:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

703

logger.info(

704

"max stashed blocks: %d (%d bytes), limit: <unknown>\n",

705

max_stashed_blocks, self._max_stashed_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

706

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

707

def ReviseStashSize(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

708

logger.info("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

709

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

710

711

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

712

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

713

for xf in self.transfers:

714

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

715

for stash_raw_id, sr in xf.stash_before:

716

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

717

718

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

719

for stash_raw_id, _ in xf.use_stash:

720

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

721

722

# Compute the maximum blocks available for stash based on /cache size and

723

# the threshold.

724

cache_size = common.OPTIONS.cache_size

725

stash_threshold = common.OPTIONS.stash_threshold

726

max_allowed = cache_size * stash_threshold / self.tgt.blocksize

727

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

728

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

729

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

730

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

731

new_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

732

733

# Now go through all the commands. Compute the required stash size on the

734

# fly. If a command requires excess stash than available, it deletes the

735

# stash by replacing the command that uses the stash with a "new" command

736

# instead.

737

for xf in self.transfers:

738

replaced_cmds = []

739

740

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

741

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

742

# Check the post-command stashed_blocks.

743

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

744

sh = self.src.RangeSha1(sr)

745

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

746

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

747

748

if stashed_blocks_after > max_allowed:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

749

# We cannot stash this one for a later command. Find out the command

750

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

751

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

752

replaced_cmds.append(use_cmd)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

753

logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

754

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

755

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

756

if sh in stashes:

757

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

758

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

759

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

760

stashed_blocks = stashed_blocks_after

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

761

762

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

763

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

764

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

765

assert xf.tgt_ranges and xf.src_ranges

766

if xf.src_ranges.overlaps(xf.tgt_ranges):

767

if stashed_blocks + xf.src_ranges.size() > max_allowed:

768

replaced_cmds.append(xf)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

769

logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

770

771

# Replace the commands in replaced_cmds with "new"s.

772

for cmd in replaced_cmds:

773

# It no longer uses any commands in "use_stash". Remove the def points

774

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

775

for stash_raw_id, sr in cmd.use_stash:

776

def_cmd = stash_map[stash_raw_id][1]

777

assert (stash_raw_id, sr) in def_cmd.stash_before

778

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

779

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

780

# Add up blocks that violates space limit and print total number to

781

# screen later.

782

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

783

cmd.ConvertToNew()

784

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

785

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

786

for _, sr in xf.use_stash:

787

sh = self.src.RangeSha1(sr)

788

assert sh in stashes

789

stashes[sh] -= 1

790

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

791

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

792

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

793

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

794

num_of_bytes = new_blocks * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

795

logger.info(

796

" Total %d blocks (%d bytes) are packed as new blocks due to "

797

"insufficient cache size.", new_blocks, num_of_bytes)

Tao Bao

304ee27

2016-12-19 11:01:38 -0800

[diff] [blame]

798

return new_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

799

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

800

def ComputePatches(self, prefix):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

801

logger.info("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

802

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

803

patch_num = 0

804

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

805

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

806

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

807

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

808

logger.info(

809

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

810

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

811

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

812

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

813

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

814

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

815

logger.info(

816

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

817

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

818

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

819

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

820

# We can't compare src and tgt directly because they may have

821

# the same content but be broken up into blocks differently, eg:

822

#

823

# ["he", "llo"] vs ["h", "ello"]

824

#

825

# We want those to compare equal, ideally without having to

826

# actually concatenate the strings (these may be tens of

827

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

828

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

829

# These are identical; we don't need to generate a patch,

830

# just issue copy commands on the device.

831

xf.style = "move"

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

832

xf.patch = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

833

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

834

if xf.src_ranges != xf.tgt_ranges:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

835

logger.info(

836

"%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,

837

100.0, xf.style,

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

838

xf.tgt_name if xf.tgt_name == xf.src_name else (

839

xf.tgt_name + " (from " + xf.src_name + ")"),

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

840

str(xf.tgt_ranges), str(xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

841

else:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

842

if xf.patch:

Tao Bao

5bab0dd

2018-07-10 17:44:52 -0700

[diff] [blame]

843

# We have already generated the patch with imgdiff, while

844

# splitting large APKs (i.e. in FindTransfers()).

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

845

assert not self.disable_imgdiff

846

imgdiff = True

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

847

else:

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

848

imgdiff = self.CanUseImgdiff(

849

xf.tgt_name, xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

850

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

851

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

856

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

857

if diff_queue:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

858

if self.threads > 1:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

859

logger.info("Computing patches (using %d threads)...", self.threads)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

860

else:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

861

logger.info("Computing patches...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

862

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

863

diff_total = len(diff_queue)

864

patches = [None] * diff_total

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

865

error_messages = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

866

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

867

# Using multiprocessing doesn't give additional benefits, due to the

868

# pattern of the code. The diffing work is done by subprocess.call, which

869

# already runs in a separate process (not affected much by the GIL -

870

# Global Interpreter Lock). Using multiprocess also requires either a)

871

# writing the diff input files in the main process before forking, or b)

872

# reopening the image file (SparseImage) in the worker processes. Doing

873

# neither of them further improves the performance.

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

874

lock = threading.Lock()

875

def diff_worker():

876

while True:

877

with lock:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

878

if not diff_queue:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

879

return

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

880

xf_index, imgdiff, patch_index = diff_queue.pop()

Tao Bao

9739514

2018-02-12 12:08:05 -0800

[diff] [blame]

881

xf = self.transfers[xf_index]

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

882

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

883

patch = xf.patch

884

if not patch:

885

src_ranges = xf.src_ranges

886

tgt_ranges = xf.tgt_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

887

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

888

src_file = common.MakeTempFile(prefix="src-")

889

with open(src_file, "wb") as fd:

890

self.src.WriteRangeDataToFd(src_ranges, fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

891

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

892

tgt_file = common.MakeTempFile(prefix="tgt-")

893

with open(tgt_file, "wb") as fd:

894

self.tgt.WriteRangeDataToFd(tgt_ranges, fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

message = []

try:

patch = compute_patch(src_file, tgt_file, imgdiff)

899

except ValueError as e:

900

message.append(

901

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

902

"imgdiff" if imgdiff else "bsdiff",

903

xf.tgt_name if xf.tgt_name == xf.src_name else

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

904

xf.tgt_name + " (from " + xf.src_name + ")",

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

905

xf.tgt_ranges, xf.src_ranges, e.message))

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

906

if message:

907

with lock:

908

error_messages.extend(message)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

909

910

with lock:

911

patches[patch_index] = (xf_index, patch)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

912

913

threads = [threading.Thread(target=diff_worker)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

914

for _ in range(self.threads)]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

919

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

920

if error_messages:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

921

logger.error('ERROR:')

922

logger.error('\n'.join(error_messages))

923

logger.error('\n\n\n')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

924

sys.exit(1)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

else:

patches = []

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

928

offset = 0

929

with open(prefix + ".patch.dat", "wb") as patch_fd:

930

for index, patch in patches:

931

xf = self.transfers[index]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

932

xf.patch_len = len(patch)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

933

xf.patch_start = offset

934

offset += xf.patch_len

935

patch_fd.write(patch)

936

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

937

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

938

logger.info(

939

"%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,

940

xf.patch_len * 100.0 / tgt_size, xf.style,

941

xf.tgt_name if xf.tgt_name == xf.src_name else (

942

xf.tgt_name + " (from " + xf.src_name + ")"),

943

xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

944

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

945

def AssertSha1Good(self):

946

"""Check the SHA-1 of the src & tgt blocks in the transfer list.

947

948

Double check the SHA-1 value to avoid the issue in b/71908713, where

949

SparseImage.RangeSha1() messed up with the hash calculation in multi-thread

950

environment. That specific problem has been fixed by protecting the

951

underlying generator function 'SparseImage._GetRangeData()' with lock.

952

"""

953

for xf in self.transfers:

954

tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)

955

assert xf.tgt_sha1 == tgt_sha1

956

if xf.style == "diff":

957

src_sha1 = self.src.RangeSha1(xf.src_ranges)

958

assert xf.src_sha1 == src_sha1

959

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

960

def AssertSequenceGood(self):

961

# Simulate the sequences of transfers we will output, and check that:

962

# - we never read a block after writing it, and

963

# - we write every block we care about exactly once.

964

965

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

966

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

967

968

# Imagine processing the transfers in order.

969

for xf in self.transfers:

970

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

971

972

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

973

for _, sr in xf.use_stash:

974

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

975

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

976

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

977

# Source image could be larger. Don't check the blocks that are in the

978

# source image only. Since they are not in 'touched', and won't ever

979

# be touched.

980

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

981

assert touched[i] == 0

982

983

# Check that the output blocks for this transfer haven't yet

984

# been touched, and touch all the blocks written by this

985

# transfer.

986

for s, e in xf.tgt_ranges:

987

for i in range(s, e):

988

assert touched[i] == 0

989

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

990

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

991

if self.tgt.hashtree_info:

992

for s, e in self.tgt.hashtree_info.hashtree_range:

993

for i in range(s, e):

994

assert touched[i] == 0

995

touched[i] = 1

996

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

997

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

998

for s, e in self.tgt.care_map:

999

for i in range(s, e):

1000

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1001

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1002

def ImproveVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

1003

logger.info("Improving vertex order...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1004

1005

# At this point our digraph is acyclic; we reversed any edges that

1006

# were backwards in the heuristically-generated sequence. The

1007

# previously-generated order is still acceptable, but we hope to

1008

# find a better order that needs less memory for stashed data.

1009

# Now we do a topological sort to generate a new vertex order,

1010

# using a greedy algorithm to choose which vertex goes next

1011

# whenever we have a choice.

1012

1013

# Make a copy of the edge set; this copy will get destroyed by the

1014

# algorithm.

1015

for xf in self.transfers:

1016

xf.incoming = xf.goes_after.copy()

1017

xf.outgoing = xf.goes_before.copy()

1018

1019

L = [] # the new vertex order

1020

1021

# S is the set of sources in the remaining graph; we always choose

1022

# the one that leaves the least amount of stashed data after it's

1023

# executed.

1024

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

1030

L.append(xf)

1031

for u in xf.outgoing:

1032

del u.incoming[xf]

1033

if not u.incoming:

1034

heapq.heappush(S, (u.NetStashChange(), u.order, u))

1035

1036

# if this fails then our graph had a cycle.

1037

assert len(L) == len(self.transfers)

1038

1039

self.transfers = L

1040

for i, xf in enumerate(L):

1041

xf.order = i

1042

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1043

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1044

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

1045

1046

For each transfer, make sure it properly stashes the blocks it touches and

1047

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

1048

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

1049

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

1050

it is possible to have multiple pairs with different 'stash_raw_id's. Each

1051

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

1052

blocks will be written to the same stash slot in WriteTransfers().

1053

"""

1054

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

1055

logger.info("Reversing backward edges...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1056

in_order = 0

1057

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1058

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1059

stash_size = 0

1060

1061

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1062

for u in xf.goes_before.copy():

1063

# xf should go before u

1064

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

1069

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

1074

assert overlap

1075

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1076

u.stash_before.append((stash_raw_id, overlap))

1077

xf.use_stash.append((stash_raw_id, overlap))

1078

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1079

stash_size += overlap.size()

1080

1081

# reverse the edge direction; now xf must go after u

1082

del xf.goes_before[u]

1083

del u.goes_after[xf]

1084

xf.goes_after[u] = None # value doesn't matter

1085

u.goes_before[xf] = None

1086

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

1087

logger.info(

1088

" %d/%d dependencies (%.2f%%) were violated; %d source blocks "

1089

"stashed.", out_of_order, in_order + out_of_order,

1090

(out_of_order * 100.0 / (in_order + out_of_order)) if (

1091

in_order + out_of_order) else 0.0,

1092

stash_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1093

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1094

def FindVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

1095

logger.info("Finding vertex sequence...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1096

1097

# This is based on "A Fast & Effective Heuristic for the Feedback

1098

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1099

# it as starting with the digraph G and moving all the vertices to

1100

# be on a horizontal line in some order, trying to minimize the

1101

# number of edges that end up pointing to the left. Left-pointing

1102

# edges will get removed to turn the digraph into a DAG. In this

1103

# case each edge has a weight which is the number of source blocks

1104

# we'll lose if that edge is removed; we try to minimize the total

1105

# weight rather than just the number of edges.

1106

1107

# Make a copy of the edge set; this copy will get destroyed by the

1108

# algorithm.

1109

for xf in self.transfers:

1110

xf.incoming = xf.goes_after.copy()

1111

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1112

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1113

1114

# We use an OrderedDict instead of just a set so that the output

1115

# is repeatable; otherwise it would depend on the hash values of

1116

# the transfer objects.

1117

G = OrderedDict()

1118

for xf in self.transfers:

1119

G[xf] = None

1120

s1 = deque() # the left side of the sequence, built from left to right

1121

s2 = deque() # the right side of the sequence, built from right to left

1122

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1123

heap = []

1124

for xf in self.transfers:

1125

xf.heap_item = HeapItem(xf)

1126

heap.append(xf.heap_item)

1127

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1128

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1129

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1130

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1131

# { key1: None, key2: None, ... }.

1132

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1133

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1134

1135

def adjust_score(iu, delta):

1136

iu.score += delta

1137

iu.heap_item.clear()

1138

iu.heap_item = HeapItem(iu)

1139

heapq.heappush(heap, iu.heap_item)

1140

1141

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1142

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1143

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1144

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1145

for u in sinks:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1146

if u not in G:

1147

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1148

s2.appendleft(u)

1149

del G[u]

1150

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1151

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1152

if not iu.outgoing:

1153

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1154

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1155

1156

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1157

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1158

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1159

for u in sources:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1160

if u not in G:

1161

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1162

s1.append(u)

1163

del G[u]

1164

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1165

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1166

if not iu.incoming:

1167

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1168

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1169

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1170

if not G:

1171

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1172

1173

# Find the "best" vertex to put next. "Best" is the one that

1174

# maximizes the net difference in source blocks saved we get by

1175

# pretending it's a source rather than a sink.

1176

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1177

while True:

1178

u = heapq.heappop(heap)

1179

if u and u.item in G:

1180

u = u.item

1181

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1182

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1183

s1.append(u)

1184

del G[u]

1185

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1186

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1187

if not iu.incoming:

1188

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1189

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1190

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1191

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1192

if not iu.outgoing:

1193

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1194

1195

# Now record the sequence in the 'order' field of each transfer,

1196

# and by rearranging self.transfers to be in the chosen sequence.

1197

1198

new_transfers = []

1199

for x in itertools.chain(s1, s2):

1200

x.order = len(new_transfers)

1201

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1206

1207

def GenerateDigraph(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

1208

logger.info("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1209

1210

# Each item of source_ranges will be:

1211

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1212

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1213

source_ranges = []

1214

for b in self.transfers:

1215

for s, e in b.src_ranges:

1216

if e > len(source_ranges):

1217

source_ranges.extend([None] * (e-len(source_ranges)))

1218

for i in range(s, e):

1219

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1220

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1221

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1222

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1223

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1224

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1225

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1226

for s, e in a.tgt_ranges:

1227

for i in range(s, e):

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1228

if i >= len(source_ranges):

1229

break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1230

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1231

if source_ranges[i] is not None:

1232

for j in source_ranges[i]:

1233

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1234

1235

for b in intersections:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1236

if a is b:

1237

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1238

1239

# If the blocks written by A are read by B, then B needs to go before A.

1240

i = a.tgt_ranges.intersect(b.src_ranges)

1241

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1242

if b.src_name == "__ZERO":

1243

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1248

b.goes_before[a] = size

1249

a.goes_after[b] = size

1250

1251

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1252

"""Parse the file_map to generate all the transfers."""

1253

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1254

def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1255

src_ranges, style, by_id):

1256

"""Add one or multiple Transfer()s by splitting large files.

1257

1258

For BBOTA v3, we need to stash source blocks for resumable feature.

1259

However, with the growth of file size and the shrink of the cache

1260

partition source blocks are too large to be stashed. If a file occupies

1261

too many blocks, we split it into smaller pieces by getting multiple

1262

Transfer()s.

1263

1264

The downside is that after splitting, we may increase the package size

1265

since the split pieces don't align well. According to our experiments,

1266

1/8 of the cache size as the per-piece limit appears to be optimal.

1267

Compared to the fixed 1024-block limit, it reduces the overall package

1268

size by 30% for volantis, and 20% for angler and bullhead."""

1269

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1270

pieces = 0

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1271

while (tgt_ranges.size() > max_blocks_per_transfer and

1272

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1273

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1274

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1275

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1276

src_first = src_ranges.first(max_blocks_per_transfer)

1277

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1278

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1279

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1280

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1281

1282

tgt_ranges = tgt_ranges.subtract(tgt_first)

1283

src_ranges = src_ranges.subtract(src_first)

1284

pieces += 1

1285

1286

# Handle remaining blocks.

1287

if tgt_ranges.size() or src_ranges.size():

1288

# Must be both non-empty.

1289

assert tgt_ranges.size() and src_ranges.size()

1290

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1291

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1292

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1293

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1294

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1295

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1296

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,

1297

by_id):

1298

"""Find all the zip files and split the others with a fixed chunk size.

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1299

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1300

This function will construct a list of zip archives, which will later be

1301

split by imgdiff to reduce the final patch size. For the other files,

1302

we will plainly split them based on a fixed chunk size with the potential

1303

patch size penalty.

1304

"""

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1305

1306

assert style == "diff"

1307

1308

# Change nothing for small files.

1309

if (tgt_ranges.size() <= max_blocks_per_transfer and

1310

src_ranges.size() <= max_blocks_per_transfer):

1311

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1312

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

style, by_id)

return

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

1316

# Split large APKs with imgdiff, if possible. We're intentionally checking

1317

# file types one more time (CanUseImgdiff() checks that as well), before

1318

# calling the costly RangeSha1()s.

1319

if (self.FileTypeSupportedByImgdiff(tgt_name) and

1320

self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

1321

if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1322

large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))

1323

return

1324

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1325

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1326

src_ranges, style, by_id)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1327

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1328

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1329

split=False):

1330

"""Wrapper function for adding a Transfer()."""

1331

1332

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1333

# otherwise add the Transfer() as is.

1334

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1335

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1336

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1337

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1338

return

1339

1340

# Handle .odex files specially to analyze the block-wise difference. If

1341

# most of the blocks are identical with only few changes (e.g. header),

1342

# we will patch the changed blocks only. This avoids stashing unchanged

1343

# blocks while patching. We limit the analysis to files without size

1344

# changes only. This is to avoid sacrificing the OTA generation cost too

1345

# much.

1346

if (tgt_name.split(".")[-1].lower() == 'odex' and

1347

tgt_ranges.size() == src_ranges.size()):

1348

1349

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1350

# few blocks remain identical, we lose the opportunity to use imgdiff

1351

# that may have better compression ratio than bsdiff.

1352

crop_threshold = 0.5

1353

1354

tgt_skipped = RangeSet()

1355

src_skipped = RangeSet()

1356

tgt_size = tgt_ranges.size()

1357

tgt_changed = 0

1358

for src_block, tgt_block in zip(src_ranges.next_item(),

1359

tgt_ranges.next_item()):

1360

src_rs = RangeSet(str(src_block))

1361

tgt_rs = RangeSet(str(tgt_block))

1362

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1363

tgt_skipped = tgt_skipped.union(tgt_rs)

1364

src_skipped = src_skipped.union(src_rs)

1365

else:

1366

tgt_changed += tgt_rs.size()

1367

1368

# Terminate early if no clear sign of benefits.

1369

if tgt_changed > tgt_size * crop_threshold:

1370

break

1371

1372

if tgt_changed < tgt_size * crop_threshold:

1373

assert tgt_changed + tgt_skipped.size() == tgt_size

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

1374

logger.info(

1375

'%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,

1376

tgt_skipped.size() * 100.0 / tgt_size, tgt_name)

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1377

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1378

"%s-skipped" % (tgt_name,),

1379

"%s-skipped" % (src_name,),

1380

tgt_skipped, src_skipped, style, by_id)

1381

1382

# Intentionally change the file extension to avoid being imgdiff'd as

1383

# the files are no longer in their original format.

1384

tgt_name = "%s-cropped" % (tgt_name,)

1385

src_name = "%s-cropped" % (src_name,)

1386

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1387

src_ranges = src_ranges.subtract(src_skipped)

1388

1389

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1394

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1395

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1396

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1397

def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,

1398

split_info):

1399

"""Parse the split_info and return a list of info tuples.

1400

1401

Args:

1402

patch_size: total size of the patch file.

1403

tgt_ranges: Ranges of the target file within the original image.

1404

src_ranges: Ranges of the source file within the original image.

split_info format:

imgdiff version#

count of pieces

<patch_size_1> <tgt_size_1> <src_ranges_1>

1409

...

1410

<patch_size_n> <tgt_size_n> <src_ranges_n>

1411

1412

Returns:

1413

[patch_start, patch_len, split_tgt_ranges, split_src_ranges]

1414

"""

1415

1416

version = int(split_info[0])

1417

assert version == 2

1418

count = int(split_info[1])

1419

assert len(split_info) - 2 == count

split_info_list = []

patch_start = 0

tgt_remain = copy.deepcopy(tgt_ranges)

1424

# each line has the format <patch_size>, <tgt_size>, <src_ranges>

1425

for line in split_info[2:]:

1426

info = line.split()

1427

assert len(info) == 3

1428

patch_length = int(info[0])

1429

1430

split_tgt_size = int(info[1])

1431

assert split_tgt_size % 4096 == 0

1432

assert split_tgt_size / 4096 <= tgt_remain.size()

1433

split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)

1434

tgt_remain = tgt_remain.subtract(split_tgt_ranges)

1435

1436

# Find the split_src_ranges within the image file from its relative

1437

# position in file.

1438

split_src_indices = RangeSet.parse_raw(info[2])

1439

split_src_ranges = RangeSet()

1440

for r in split_src_indices:

1441

curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))

1442

assert not split_src_ranges.overlaps(curr_range)

1443

split_src_ranges = split_src_ranges.union(curr_range)

1444

1445

split_info_list.append((patch_start, patch_length,

1446

split_tgt_ranges, split_src_ranges))

1447

patch_start += patch_length

1448

1449

# Check that the sizes of all the split pieces add up to the final file

1450

# size for patch and target.

1451

assert tgt_remain.size() == 0

1452

assert patch_start == patch_size

1453

return split_info_list

1454

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1455

def SplitLargeApks():

1456

"""Split the large apks files.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1457

1458

Example: Chrome.apk will be split into

1459

src-0: Chrome.apk-0, tgt-0: Chrome.apk-0

1460

src-1: Chrome.apk-1, tgt-1: Chrome.apk-1

1461

...

1462

1463

After the split, the target pieces are continuous and block aligned; and

1464

the source pieces are mutually exclusive. During the split, we also

1465

generate and save the image patch between src-X & tgt-X. This patch will

1466

be valid because the block ranges of src-X & tgt-X will always stay the

1467

same afterwards; but there's a chance we don't use the patch if we

1468

convert the "diff" command into "new" or "move" later.

"""

while True:

with transfer_lock:

if not large_apks:

return

tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)

1476

1477

src_file = common.MakeTempFile(prefix="src-")

1478

tgt_file = common.MakeTempFile(prefix="tgt-")

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

1479

with open(src_file, "wb") as src_fd:

1480

self.src.WriteRangeDataToFd(src_ranges, src_fd)

1481

with open(tgt_file, "wb") as tgt_fd:

1482

self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1483

1484

patch_file = common.MakeTempFile(prefix="patch-")

1485

patch_info_file = common.MakeTempFile(prefix="split_info-")

1486

cmd = ["imgdiff", "-z",

1487

"--block-limit={}".format(max_blocks_per_transfer),

1488

"--split-info=" + patch_info_file,

1489

src_file, tgt_file, patch_file]

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

1490

proc = common.Run(cmd)

1491

imgdiff_output, _ = proc.communicate()

1492

assert proc.returncode == 0, \

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

1493

"Failed to create imgdiff patch between {} and {}:\n{}".format(

1494

src_name, tgt_name, imgdiff_output)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1495

1496

with open(patch_info_file) as patch_info:

1497

lines = patch_info.readlines()

1498

1499

patch_size_total = os.path.getsize(patch_file)

1500

split_info_list = ParseAndValidateSplitInfo(patch_size_total,

1501

tgt_ranges, src_ranges,

1502

lines)

1503

for index, (patch_start, patch_length, split_tgt_ranges,

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1504

split_src_ranges) in enumerate(split_info_list):

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1505

with open(patch_file) as f:

1506

f.seek(patch_start)

1507

patch_content = f.read(patch_length)

1508

1509

split_src_name = "{}-{}".format(src_name, index)

1510

split_tgt_name = "{}-{}".format(tgt_name, index)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1511

split_large_apks.append((split_tgt_name,

split_src_name,

split_tgt_ranges,

split_src_ranges,

patch_content))

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1516

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame^]

1517

logger.info("Finding transfers...")

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1518

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1519

large_apks = []

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1520

split_large_apks = []

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1521

cache_size = common.OPTIONS.cache_size

1522

split_threshold = 0.125

1523

max_blocks_per_transfer = int(cache_size * split_threshold /

1524

self.tgt.blocksize)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1525

empty = RangeSet()

Tianjie Xu

20a86cd

2018-01-12 12:21:00 -0800

[diff] [blame]

1526

for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1527

if tgt_fn == "__ZERO":

1528

# the special "__ZERO" domain is all the blocks not contained

1529

# in any file and that are filled with zeros. We have a

1530

# special transfer style for zero blocks.

1531

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1532

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1533

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1534

continue

1535

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1536

elif tgt_fn == "__COPY":

1537

# "__COPY" domain includes all the blocks not contained in any

1538

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1539

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1540

continue

1541

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

1542

elif tgt_fn == "__HASHTREE":

1543

continue

1544

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1545

elif tgt_fn in self.src.file_map:

1546

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1547

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1548

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1549

continue

1550

1551

b = os.path.basename(tgt_fn)

1552

if b in self.src_basenames:

1553

# Look for an exact basename match in the source.

1554

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1555

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1556

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1557

continue

1558

1559

b = re.sub("[0-9]+", "#", b)

1560

if b in self.src_numpatterns:

1561

# Look for a 'number pattern' match (a basename match after

1562

# all runs of digits are replaced by "#"). (This is useful

1563

# for .so files that contain version numbers in the filename

1564

# that get bumped.)

1565

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1566

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1567

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1568

continue

1569

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1570

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1571

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1572

transfer_lock = threading.Lock()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1573

threads = [threading.Thread(target=SplitLargeApks)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1574

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1580

# Sort the split transfers for large apks to generate a determinate package.

1581

split_large_apks.sort()

1582

for (tgt_name, src_name, tgt_ranges, src_ranges,

1583

patch) in split_large_apks:

1584

transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1585

self.tgt.RangeSha1(tgt_ranges),

1586

self.src.RangeSha1(src_ranges),

1587

"diff", self.transfers)

1588

transfer_split.patch = patch

1589

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1590

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1591

for k in self.src.file_map.keys():

1592

b = os.path.basename(k)

1593

self.src_basenames[b] = k

1594

b = re.sub("[0-9]+", "#", b)

1595

self.src_numpatterns[b] = k

1596

1597

@staticmethod

1598

def AssertPartition(total, seq):

1599

"""Assert that all the RangeSets in 'seq' form a partition of the

1600

'total' RangeSet (ie, they are nonintersecting and their union

1601

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1602

Doug Zongker