Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

18

import copy

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

20

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

21

import itertools

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

22

import logging

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

23

import multiprocessing

24

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

25

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

27

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

29

import zlib

30

from collections import deque, namedtuple, OrderedDict

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

31

from hashlib import sha1

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

32

33

import common

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

34

from rangelib import RangeSet

35

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

36

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

37

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

38

logger = logging.getLogger(__name__)

39

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

40

# The tuple contains the style and bytes of a bsdiff|imgdiff patch.

41

PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])

42

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

43

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

44

def compute_patch(srcfile, tgtfile, imgdiff=False):

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

45

"""Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

46

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

47

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

48

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

49

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

50

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

51

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

52

# here, since they contain temp filenames only.

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

53

proc = common.Run(cmd, verbose=False)

54

output, _ = proc.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

55

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

56

if proc.returncode != 0:

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

57

raise ValueError(output)

58

59

with open(patchfile, 'rb') as f:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

60

return PatchInfo(imgdiff, f.read())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

61

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

62

63

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

64

def RangeSha1(self, ranges):

65

raise NotImplementedError

66

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

67

def ReadRangeSet(self, ranges):

68

raise NotImplementedError

69

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

70

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

71

raise NotImplementedError

72

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

73

def WriteRangeDataToFd(self, ranges, fd):

74

raise NotImplementedError

75

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

76

77

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

78

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

79

80

def __init__(self):

81

self.blocksize = 4096

82

self.care_map = RangeSet()

83

self.clobbered_blocks = RangeSet()

84

self.extended = RangeSet()

85

self.total_blocks = 0

86

self.file_map = {}

87

88

def RangeSha1(self, ranges):

89

return sha1().hexdigest()

90

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

91

def ReadRangeSet(self, ranges):

92

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

93

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

94

def TotalSha1(self, include_clobbered_blocks=False):

95

# EmptyImage always carries empty clobbered_blocks, so

96

# include_clobbered_blocks can be ignored.

97

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

98

return sha1().hexdigest()

99

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

100

def WriteRangeDataToFd(self, ranges, fd):

101

raise ValueError("Can't write data from EmptyImage to file")

102

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

103

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

104

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

105

"""An image wrapped around a single string of data."""

106

107

def __init__(self, data, trim=False, pad=False):

108

self.data = data

109

self.blocksize = 4096

110

111

assert not (trim and pad)

112

113

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

114

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

115

if partial > 0:

116

if trim:

117

self.data = self.data[:-partial]

118

elif pad:

119

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

120

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

121

else:

122

raise ValueError(("data for DataImage must be multiple of %d bytes "

123

"unless trim or pad is specified") %

124

(self.blocksize,))

125

126

assert len(self.data) % self.blocksize == 0

127

128

self.total_blocks = len(self.data) / self.blocksize

129

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

130

# When the last block is padded, we always write the whole block even for

131

# incremental OTAs. Because otherwise the last block may get skipped if

132

# unchanged for an incremental, but would fail the post-install

133

# verification if it has non-zero contents in the padding bytes.

134

# Bug: 23828506

135

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

136

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

137

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

138

clobbered_blocks = []

139

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

140

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

145

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

146

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

147

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

148

if d == reference:

149

zero_blocks.append(i)

150

zero_blocks.append(i+1)

151

else:

152

nonzero_blocks.append(i)

153

nonzero_blocks.append(i+1)

154

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

155

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

156

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

157

self.file_map = dict()

158

if zero_blocks:

159

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

160

if nonzero_blocks:

161

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

162

if clobbered_blocks:

163

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

164

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

165

def _GetRangeData(self, ranges):

166

for s, e in ranges:

167

yield self.data[s*self.blocksize:e*self.blocksize]

168

169

def RangeSha1(self, ranges):

170

h = sha1()

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

171

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

175

def ReadRangeSet(self, ranges):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

176

return [self._GetRangeData(ranges)]

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

177

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

178

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

179

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

180

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

181

else:

182

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

183

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

184

def WriteRangeDataToFd(self, ranges, fd):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

185

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

186

fd.write(data)

187

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

188

189

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

190

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

191

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

192

self.tgt_name = tgt_name

193

self.src_name = src_name

194

self.tgt_ranges = tgt_ranges

195

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

196

self.tgt_sha1 = tgt_sha1

197

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

198

self.style = style

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

199

200

# We use OrderedDict rather than dict so that the output is repeatable;

201

# otherwise it would depend on the hash values of the Transfer objects.

202

self.goes_before = OrderedDict()

203

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

204

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

205

self.stash_before = []

206

self.use_stash = []

207

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

211

self._patch_info = None

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

212

213

@property

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

214

def patch_info(self):

215

return self._patch_info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

216

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

217

@patch_info.setter

218

def patch_info(self, info):

219

if info:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

220

assert self.style == "diff"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

221

self._patch_info = info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

222

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

223

def NetStashChange(self):

224

return (sum(sr.size() for (_, sr) in self.stash_before) -

225

sum(sr.size() for (_, sr) in self.use_stash))

226

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

227

def ConvertToNew(self):

228

assert self.style != "new"

229

self.use_stash = []

230

self.style = "new"

231

self.src_ranges = RangeSet()

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

232

self.patch_info = None

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

233

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

234

def __str__(self):

235

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

236

" to " + str(self.tgt_ranges) + ">")

237

238

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

239

@functools.total_ordering

240

class HeapItem(object):

241

def __init__(self, item):

242

self.item = item

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

243

# Negate the score since python's heap is a min-heap and we want the

244

# maximum score.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

245

self.score = -item.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

246

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

247

def clear(self):

248

self.item = None

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

249

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

250

def __bool__(self):

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

251

return self.item is not None

252

253

# Python 2 uses __nonzero__, while Python 3 uses __bool__.

254

__nonzero__ = __bool__

255

256

# The rest operations are generated by functools.total_ordering decorator.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

257

def __eq__(self, other):

258

return self.score == other.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

259

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

260

def __le__(self, other):

261

return self.score <= other.score

262

263

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

264

class ImgdiffStats(object):

265

"""A class that collects imgdiff stats.

266

267

It keeps track of the files that will be applied imgdiff while generating

268

BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific

269

reasons. The stats is only meaningful when imgdiff not being disabled by the

270

caller of BlockImageDiff. In addition, only files with supported types

271

(BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

272

"""

273

274

USED_IMGDIFF = "APK files diff'd with imgdiff"

275

USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"

276

277

# Reasons for not applying imgdiff on APKs.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

278

SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

279

SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

280

SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

281

282

# The list of valid reasons, which will also be the dumped order in a report.

283

REASONS = (

284

USED_IMGDIFF,

285

USED_IMGDIFF_LARGE_APK,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

286

SKIPPED_NONMONOTONIC,

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

287

SKIPPED_SHARED_BLOCKS,

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

288

SKIPPED_INCOMPLETE,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

)

def __init__(self):

self.stats = {}

def Log(self, filename, reason):

295

"""Logs why imgdiff can or cannot be applied to the given filename.

296

297

Args:

298

filename: The filename string.

299

reason: One of the reason constants listed in REASONS.

300

301

Raises:

302

AssertionError: On unsupported filetypes or invalid reason.

303

"""

304

assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)

305

assert reason in self.REASONS

306

307

if reason not in self.stats:

308

self.stats[reason] = set()

309

self.stats[reason].add(filename)

310

311

def Report(self):

312

"""Prints a report of the collected imgdiff stats."""

313

314

def print_header(header, separator):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

315

logger.info(header)

316

logger.info(separator * len(header) + '\n')

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

317

318

print_header(' Imgdiff Stats Report ', '=')

319

for key in self.REASONS:

320

if key not in self.stats:

321

continue

322

values = self.stats[key]

323

section_header = ' {} (count: {}) '.format(key, len(values))

324

print_header(section_header, '-')

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

325

logger.info(''.join([' {}\n'.format(name) for name in values]))

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

326

327

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

328

class BlockImageDiff(object):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

329

"""Generates the diff of two block image objects.

330

331

BlockImageDiff works on two image objects. An image object is anything that

332

provides the following attributes:

333

334

blocksize: the size in bytes of a block, currently must be 4096.

335

336

total_blocks: the total size of the partition/image, in blocks.

337

338

care_map: a RangeSet containing which blocks (in the range [0,

339

total_blocks) we actually care about; i.e. which blocks contain data.

340

341

file_map: a dict that partitions the blocks contained in care_map into

342

smaller domains that are useful for doing diffs on. (Typically a domain

343

is a file, and the key in file_map is the pathname.)

344

345

clobbered_blocks: a RangeSet containing which blocks contain data but may

346

be altered by the FS. They need to be excluded when verifying the

347

partition integrity.

348

349

ReadRangeSet(): a function that takes a RangeSet and returns the data

350

contained in the image blocks of that RangeSet. The data is returned as

351

a list or tuple of strings; concatenating the elements together should

352

produce the requested data. Implementations are free to break up the

353

data into list/tuple elements in any way that is convenient.

354

355

RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of

356

all the data in the specified range.

357

358

TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of

359

all the data in the image (ie, all the blocks in the care_map minus

360

clobbered_blocks, or including the clobbered blocks if

361

include_clobbered_blocks is True).

362

363

When creating a BlockImageDiff, the src image may be None, in which case the

364

list of transfers produced will never read from the original image.

365

"""

366

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

367

def __init__(self, tgt, src=None, threads=None, version=4,

368

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

369

if threads is None:

370

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

371

if threads == 0:

372

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

373

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

374

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

375

self.transfers = []

376

self.src_basenames = {}

377

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

378

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

379

self.touched_src_ranges = RangeSet()

380

self.touched_src_sha1 = None

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

381

self.disable_imgdiff = disable_imgdiff

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

382

self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

383

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

384

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

392

assert tgt.blocksize == 4096

393

assert src.blocksize == 4096

394

395

# The range sets in each filemap should comprise a partition of

396

# the care map.

397

self.AssertPartition(src.care_map, src.file_map.values())

398

self.AssertPartition(tgt.care_map, tgt.file_map.values())

399

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

400

@property

401

def max_stashed_size(self):

402

return self._max_stashed_size

403

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

404

@staticmethod

405

def FileTypeSupportedByImgdiff(filename):

406

"""Returns whether the file type is supported by imgdiff."""

407

return filename.lower().endswith(('.apk', '.jar', '.zip'))

408

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

409

def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

410

"""Checks whether we can apply imgdiff for the given RangeSets.

411

412

For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use

413

'imgdiff -z' if possible. Because it usually produces significantly smaller

414

patches than bsdiff.

415

416

This is permissible if all of the following conditions hold.

417

- The imgdiff hasn't been disabled by the caller (e.g. squashfs);

418

- The file type is supported by imgdiff;

419

- The source and target blocks are monotonic (i.e. the data is stored with

420

blocks in increasing order);

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

421

- Both files don't contain shared blocks;

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

422

- Both files have complete lists of blocks;

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

423

- We haven't removed any blocks from the source set.

424

425

If all these conditions are satisfied, concatenating all the blocks in the

426

RangeSet in order will produce a valid ZIP file (plus possibly extra zeros

427

in the last block). imgdiff is fine with extra zeros at the end of the file.

428

429

Args:

430

name: The filename to be diff'd.

431

tgt_ranges: The target RangeSet.

432

src_ranges: The source RangeSet.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

433

large_apk: Whether this is to split a large APK.

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

Returns:

A boolean result.

"""

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

438

if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

439

return False

440

441

if not tgt_ranges.monotonic or not src_ranges.monotonic:

442

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)

443

return False

444

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

445

if (tgt_ranges.extra.get('uses_shared_blocks') or

446

src_ranges.extra.get('uses_shared_blocks')):

447

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)

448

return False

449

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

450

if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):

451

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)

452

return False

453

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

454

reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk

455

else ImgdiffStats.USED_IMGDIFF)

456

self.imgdiff_stats.Log(name, reason)

457

return True

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

458

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

459

def Compute(self, prefix):

460

# When looking for a source file to use as the diff input for a

461

# target file, we try:

462

# 1) an exact path match if available, otherwise

463

# 2) a exact basename match if available, otherwise

464

# 3) a basename match after all runs of digits are replaced by

465

# "#" if available, otherwise

466

# 4) we have no source for this target.

467

self.AbbreviateSourceNames()

468

self.FindTransfers()

469

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

470

self.FindSequenceForTransfers()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

471

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

472

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

473

if common.OPTIONS.cache_size is not None:

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

474

stash_limit = (common.OPTIONS.cache_size *

475

common.OPTIONS.stash_threshold / self.tgt.blocksize)

476

# Ignore the stash limit and calculate the maximum simultaneously stashed

477

# blocks needed.

478

_, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)

479

480

# We cannot stash more blocks than the stash limit simultaneously. As a

481

# result, some 'diff' commands will be converted to new; leading to an

482

# unintended large package. To mitigate this issue, we can carefully

483

# choose the transfers for conversion. The number '1024' can be further

484

# tweaked here to balance the package size and build time.

485

if max_stashed_blocks > stash_limit + 1024:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame^]

486

self.SelectAndConvertDiffTransfersToNew(

487

max_stashed_blocks - stash_limit)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

488

# Regenerate the sequence as the graph has changed.

489

self.FindSequenceForTransfers()

490

491

# Revise the stash size again to keep the size under limit.

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

492

self.ReviseStashSize()

493

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

494

# Double-check our work.

495

self.AssertSequenceGood()

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

496

self.AssertSha1Good()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

497

498

self.ComputePatches(prefix)

499

self.WriteTransfers(prefix)

500

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

501

# Report the imgdiff stats.

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

502

if not self.disable_imgdiff:

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

503

self.imgdiff_stats.Report()

504

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

505

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

506

def WriteSplitTransfers(out, style, target_blocks):

507

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

508

509

This prevents the target size of one command from being too large; and

510

might help to avoid fsync errors on some devices."""

511

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

512

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

513

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

514

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

515

while target_blocks:

516

blocks_to_write = target_blocks.first(blocks_limit)

517

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

518

total += blocks_to_write.size()

519

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

520

return total

521

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

522

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

523

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

524

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

525

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

526

# id. 'stashes' records the map from 'hash' to the ref count. The stash

527

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

528

stashes = {}

529

stashed_blocks = 0

530

max_stashed_blocks = 0

531

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

532

for xf in self.transfers:

533

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

534

for _, sr in xf.stash_before:

535

sh = self.src.RangeSha1(sr)

536

if sh in stashes:

537

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

538

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

539

stashes[sh] = 1

540

stashed_blocks += sr.size()

541

self.touched_src_ranges = self.touched_src_ranges.union(sr)

542

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

543

544

if stashed_blocks > max_stashed_blocks:

545

max_stashed_blocks = stashed_blocks

546

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

547

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

548

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

549

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

550

# <# blocks> <src ranges>

551

# OR

552

# <# blocks> <src ranges> <src locs> <stash refs...>

553

# OR

554

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

555

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

556

size = xf.src_ranges.size()

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

557

src_str_buffer = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

558

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

559

unstashed_src_ranges = xf.src_ranges

560

mapped_stashes = []

561

for _, sr in xf.use_stash:

562

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

563

sh = self.src.RangeSha1(sr)

564

sr = xf.src_ranges.map_within(sr)

565

mapped_stashes.append(sr)

566

assert sh in stashes

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

567

src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

568

stashes[sh] -= 1

569

if stashes[sh] == 0:

570

free_string.append("free %s\n" % (sh,))

571

free_size += sr.size()

572

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

573

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

574

if unstashed_src_ranges:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

575

src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

576

if xf.use_stash:

577

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

578

src_str_buffer.insert(2, mapped_unstashed.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

579

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

580

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

581

else:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

582

src_str_buffer.insert(1, "-")

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

583

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

584

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

585

src_str = " ".join(src_str_buffer)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

586

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

587

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

588

# zero <rangeset>

589

# new <rangeset>

590

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

591

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

592

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

593

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

594

595

tgt_size = xf.tgt_ranges.size()

596

597

if xf.style == "new":

598

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

599

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

600

total += tgt_size

601

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

602

assert xf.tgt_ranges

603

assert xf.src_ranges.size() == tgt_size

604

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

605

# take into account automatic stashing of overlapping blocks

606

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

607

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

608

if temp_stash_usage > max_stashed_blocks:

609

max_stashed_blocks = temp_stash_usage

610

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

611

self.touched_src_ranges = self.touched_src_ranges.union(

612

xf.src_ranges)

613

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

614

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

615

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

616

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

617

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

618

total += tgt_size

619

elif xf.style in ("bsdiff", "imgdiff"):

620

assert xf.tgt_ranges

621

assert xf.src_ranges

622

# take into account automatic stashing of overlapping blocks

623

if xf.src_ranges.overlaps(xf.tgt_ranges):

624

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

625

if temp_stash_usage > max_stashed_blocks:

626

max_stashed_blocks = temp_stash_usage

627

628

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

629

630

out.append("%s %d %d %s %s %s %s\n" % (

631

xf.style,

632

xf.patch_start, xf.patch_len,

633

xf.src_sha1,

634

xf.tgt_sha1,

635

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

636

total += tgt_size

637

elif xf.style == "zero":

638

assert xf.tgt_ranges

639

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

640

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

641

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

642

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

643

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

644

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

645

if free_string:

646

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

647

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

648

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

649

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

650

# Sanity check: abort if we're going to need more stash space than

651

# the allowed size (cache_size * threshold). There are two purposes

652

# of having a threshold here. a) Part of the cache may have been

653

# occupied by some recovery logs. b) It will buy us some time to deal

654

# with the oversize issue.

655

cache_size = common.OPTIONS.cache_size

656

stash_threshold = common.OPTIONS.stash_threshold

657

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

658

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

659

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

660

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

661

self.tgt.blocksize, max_allowed, cache_size,

662

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

663

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

664

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

665

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

666

if self.tgt.hashtree_info:

667

out.append("compute_hash_tree {} {} {} {} {}\n".format(

668

self.tgt.hashtree_info.hashtree_range.to_string_raw(),

669

self.tgt.hashtree_info.filesystem_range.to_string_raw(),

670

self.tgt.hashtree_info.hash_algorithm,

671

self.tgt.hashtree_info.salt,

672

self.tgt.hashtree_info.root_hash))

673

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

674

# Zero out extended blocks as a workaround for bug 20881595.

675

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

676

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

677

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

678

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

679

680

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

681

# data in the new image; b) will not be touched by dm-verity. Out of those

682

# blocks, we erase the ones that won't be used in this update at the

683

# beginning of an update. The rest would be erased at the end. This is to

684

# work around the eMMC issue observed on some devices, which may otherwise

685

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

686

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

687

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

688

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

689

690

erase_first = new_dontcare.subtract(self.touched_src_ranges)

691

if erase_first:

692

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

693

694

erase_last = new_dontcare.subtract(erase_first)

695

if erase_last:

696

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

697

698

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

699

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

700

# v3+: the number of stash slots is unused.

701

out.insert(2, "0\n")

702

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

703

704

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

708

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

709

OPTIONS = common.OPTIONS

710

if OPTIONS.cache_size is not None:

711

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

712

logger.info(

713

"max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",

714

max_stashed_blocks, self._max_stashed_size, max_allowed,

715

self._max_stashed_size * 100.0 / max_allowed)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

716

else:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

717

logger.info(

718

"max stashed blocks: %d (%d bytes), limit: <unknown>\n",

719

max_stashed_blocks, self._max_stashed_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

720

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

721

def ReviseStashSize(self, ignore_stash_limit=False):

722

""" Revises the transfers to keep the stash size within the size limit.

723

724

Iterates through the transfer list and calculates the stash size each

725

transfer generates. Converts the affected transfers to new if we reach the

stash limit.

Args:

ignore_stash_limit: Ignores the stash limit and calculates the max

730

simultaneous stashed blocks instead. No change will be made to the

731

transfer list with this flag.

732

733

Return:

734

A tuple of (tgt blocks converted to new, max stashed blocks)

735

"""

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

736

logger.info("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

737

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

738

739

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

740

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

741

for xf in self.transfers:

742

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

743

for stash_raw_id, sr in xf.stash_before:

744

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

745

746

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

747

for stash_raw_id, _ in xf.use_stash:

748

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

749

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

750

max_allowed_blocks = None

751

if not ignore_stash_limit:

752

# Compute the maximum blocks available for stash based on /cache size and

753

# the threshold.

754

cache_size = common.OPTIONS.cache_size

755

stash_threshold = common.OPTIONS.stash_threshold

756

max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

757

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

758

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

759

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

760

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

761

new_blocks = 0

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

762

max_stashed_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

763

764

# Now go through all the commands. Compute the required stash size on the

765

# fly. If a command requires excess stash than available, it deletes the

766

# stash by replacing the command that uses the stash with a "new" command

767

# instead.

768

for xf in self.transfers:

769

replaced_cmds = []

770

771

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

772

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

773

# Check the post-command stashed_blocks.

774

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

775

sh = self.src.RangeSha1(sr)

776

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

777

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

778

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

779

if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

780

# We cannot stash this one for a later command. Find out the command

781

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

782

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

783

replaced_cmds.append(use_cmd)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

784

logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

785

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

786

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

787

if sh in stashes:

788

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

789

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

790

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

791

stashed_blocks = stashed_blocks_after

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

792

max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

793

794

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

795

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

796

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

797

assert xf.tgt_ranges and xf.src_ranges

798

if xf.src_ranges.overlaps(xf.tgt_ranges):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

799

if (max_allowed_blocks and

800

stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

801

replaced_cmds.append(xf)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

802

logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

803

else:

804

# The whole source ranges will be stashed for implicit stashes.

805

max_stashed_blocks = max(max_stashed_blocks,

806

stashed_blocks + xf.src_ranges.size())

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

807

808

# Replace the commands in replaced_cmds with "new"s.

809

for cmd in replaced_cmds:

810

# It no longer uses any commands in "use_stash". Remove the def points

811

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

812

for stash_raw_id, sr in cmd.use_stash:

813

def_cmd = stash_map[stash_raw_id][1]

814

assert (stash_raw_id, sr) in def_cmd.stash_before

815

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

816

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

817

# Add up blocks that violates space limit and print total number to

818

# screen later.

819

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

820

cmd.ConvertToNew()

821

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

822

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

823

for _, sr in xf.use_stash:

824

sh = self.src.RangeSha1(sr)

825

assert sh in stashes

826

stashes[sh] -= 1

827

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

828

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

829

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

830

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

831

num_of_bytes = new_blocks * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

832

logger.info(

833

" Total %d blocks (%d bytes) are packed as new blocks due to "

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame^]

834

"insufficient cache size. Maximum blocks stashed simultaneously: %d",

835

new_blocks, num_of_bytes, max_stashed_blocks)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

836

return new_blocks, max_stashed_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

837

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

838

def ComputePatches(self, prefix):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

839

logger.info("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

840

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

841

patch_num = 0

842

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

843

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

844

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

845

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

846

logger.info(

847

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

848

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

849

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

850

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

851

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

852

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

853

logger.info(

854

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

855

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

856

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

857

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

858

# We can't compare src and tgt directly because they may have

859

# the same content but be broken up into blocks differently, eg:

860

#

861

# ["he", "llo"] vs ["h", "ello"]

862

#

863

# We want those to compare equal, ideally without having to

864

# actually concatenate the strings (these may be tens of

865

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

866

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

867

# These are identical; we don't need to generate a patch,

868

# just issue copy commands on the device.

869

xf.style = "move"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

870

xf.patch_info = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

871

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

872

if xf.src_ranges != xf.tgt_ranges:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

873

logger.info(

874

"%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,

875

100.0, xf.style,

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

876

xf.tgt_name if xf.tgt_name == xf.src_name else (

877

xf.tgt_name + " (from " + xf.src_name + ")"),

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

878

str(xf.tgt_ranges), str(xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

879

else:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

880

if xf.patch_info:

881

# We have already generated the patch (e.g. during split of large

882

# APKs or reduction of stash size)

883

imgdiff = xf.patch_info.imgdiff

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

884

else:

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

885

imgdiff = self.CanUseImgdiff(

886

xf.tgt_name, xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

887

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

888

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

893

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

894

patches = self.ComputePatchesForInputList(diff_queue, False)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

895

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

896

offset = 0

897

with open(prefix + ".patch.dat", "wb") as patch_fd:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

898

for index, patch_info, _ in patches:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

899

xf = self.transfers[index]

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

900

xf.patch_len = len(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

901

xf.patch_start = offset

902

offset += xf.patch_len

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

903

patch_fd.write(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

904

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

905

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

906

logger.info(

907

"%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,

908

xf.patch_len * 100.0 / tgt_size, xf.style,

909

xf.tgt_name if xf.tgt_name == xf.src_name else (

910

xf.tgt_name + " (from " + xf.src_name + ")"),

911

xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

912

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

913

def AssertSha1Good(self):

914

"""Check the SHA-1 of the src & tgt blocks in the transfer list.

915

916

Double check the SHA-1 value to avoid the issue in b/71908713, where

917

SparseImage.RangeSha1() messed up with the hash calculation in multi-thread

918

environment. That specific problem has been fixed by protecting the

919

underlying generator function 'SparseImage._GetRangeData()' with lock.

920

"""

921

for xf in self.transfers:

922

tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)

923

assert xf.tgt_sha1 == tgt_sha1

924

if xf.style == "diff":

925

src_sha1 = self.src.RangeSha1(xf.src_ranges)

926

assert xf.src_sha1 == src_sha1

927

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

928

def AssertSequenceGood(self):

929

# Simulate the sequences of transfers we will output, and check that:

930

# - we never read a block after writing it, and

931

# - we write every block we care about exactly once.

932

933

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

934

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

935

936

# Imagine processing the transfers in order.

937

for xf in self.transfers:

938

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

939

940

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

941

for _, sr in xf.use_stash:

942

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

943

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

944

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

945

# Source image could be larger. Don't check the blocks that are in the

946

# source image only. Since they are not in 'touched', and won't ever

947

# be touched.

948

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

949

assert touched[i] == 0

950

951

# Check that the output blocks for this transfer haven't yet

952

# been touched, and touch all the blocks written by this

953

# transfer.

954

for s, e in xf.tgt_ranges:

955

for i in range(s, e):

956

assert touched[i] == 0

957

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

958

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

959

if self.tgt.hashtree_info:

960

for s, e in self.tgt.hashtree_info.hashtree_range:

961

for i in range(s, e):

962

assert touched[i] == 0

963

touched[i] = 1

964

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

965

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

966

for s, e in self.tgt.care_map:

967

for i in range(s, e):

968

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

969

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

970

def FindSequenceForTransfers(self):

971

"""Finds a sequence for the given transfers.

972

973

The goal is to minimize the violation of order dependencies between these

974

transfers, so that fewer blocks are stashed when applying the update.

975

"""

976

977

# Clear the existing dependency between transfers

978

for xf in self.transfers:

979

xf.goes_before = OrderedDict()

980

xf.goes_after = OrderedDict()

xf.stash_before = []

xf.use_stash = []

# Find the ordering dependencies among transfers (this is O(n^2)

986

# in the number of transfers).

987

self.GenerateDigraph()

988

# Find a sequence of transfers that satisfies as many ordering

989

# dependencies as possible (heuristically).

990

self.FindVertexSequence()

991

# Fix up the ordering dependencies that the sequence didn't

992

# satisfy.

993

self.ReverseBackwardEdges()

994

self.ImproveVertexSequence()

995

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

996

def ImproveVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

997

logger.info("Improving vertex order...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

998

999

# At this point our digraph is acyclic; we reversed any edges that

1000

# were backwards in the heuristically-generated sequence. The

1001

# previously-generated order is still acceptable, but we hope to

1002

# find a better order that needs less memory for stashed data.

1003

# Now we do a topological sort to generate a new vertex order,

1004

# using a greedy algorithm to choose which vertex goes next

1005

# whenever we have a choice.

1006

1007

# Make a copy of the edge set; this copy will get destroyed by the

1008

# algorithm.

1009

for xf in self.transfers:

1010

xf.incoming = xf.goes_after.copy()

1011

xf.outgoing = xf.goes_before.copy()

1012

1013

L = [] # the new vertex order

1014

1015

# S is the set of sources in the remaining graph; we always choose

1016

# the one that leaves the least amount of stashed data after it's

1017

# executed.

1018

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

1024

L.append(xf)

1025

for u in xf.outgoing:

1026

del u.incoming[xf]

1027

if not u.incoming:

1028

heapq.heappush(S, (u.NetStashChange(), u.order, u))

1029

1030

# if this fails then our graph had a cycle.

1031

assert len(L) == len(self.transfers)

1032

1033

self.transfers = L

1034

for i, xf in enumerate(L):

1035

xf.order = i

1036

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1037

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1038

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

1039

1040

For each transfer, make sure it properly stashes the blocks it touches and

1041

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

1042

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

1043

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

1044

it is possible to have multiple pairs with different 'stash_raw_id's. Each

1045

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

1046

blocks will be written to the same stash slot in WriteTransfers().

1047

"""

1048

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1049

logger.info("Reversing backward edges...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1050

in_order = 0

1051

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1052

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1053

stash_size = 0

1054

1055

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1056

for u in xf.goes_before.copy():

1057

# xf should go before u

1058

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

1063

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

1068

assert overlap

1069

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1070

u.stash_before.append((stash_raw_id, overlap))

1071

xf.use_stash.append((stash_raw_id, overlap))

1072

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1073

stash_size += overlap.size()

1074

1075

# reverse the edge direction; now xf must go after u

1076

del xf.goes_before[u]

1077

del u.goes_after[xf]

1078

xf.goes_after[u] = None # value doesn't matter

1079

u.goes_before[xf] = None

1080

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1081

logger.info(

1082

" %d/%d dependencies (%.2f%%) were violated; %d source blocks "

1083

"stashed.", out_of_order, in_order + out_of_order,

1084

(out_of_order * 100.0 / (in_order + out_of_order)) if (

1085

in_order + out_of_order) else 0.0,

1086

stash_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1087

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1088

def FindVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1089

logger.info("Finding vertex sequence...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1090

1091

# This is based on "A Fast & Effective Heuristic for the Feedback

1092

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1093

# it as starting with the digraph G and moving all the vertices to

1094

# be on a horizontal line in some order, trying to minimize the

1095

# number of edges that end up pointing to the left. Left-pointing

1096

# edges will get removed to turn the digraph into a DAG. In this

1097

# case each edge has a weight which is the number of source blocks

1098

# we'll lose if that edge is removed; we try to minimize the total

1099

# weight rather than just the number of edges.

1100

1101

# Make a copy of the edge set; this copy will get destroyed by the

1102

# algorithm.

1103

for xf in self.transfers:

1104

xf.incoming = xf.goes_after.copy()

1105

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1106

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1107

1108

# We use an OrderedDict instead of just a set so that the output

1109

# is repeatable; otherwise it would depend on the hash values of

1110

# the transfer objects.

1111

G = OrderedDict()

1112

for xf in self.transfers:

1113

G[xf] = None

1114

s1 = deque() # the left side of the sequence, built from left to right

1115

s2 = deque() # the right side of the sequence, built from right to left

1116

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1117

heap = []

1118

for xf in self.transfers:

1119

xf.heap_item = HeapItem(xf)

1120

heap.append(xf.heap_item)

1121

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1122

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1123

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1124

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1125

# { key1: None, key2: None, ... }.

1126

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1127

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1128

1129

def adjust_score(iu, delta):

1130

iu.score += delta

1131

iu.heap_item.clear()

1132

iu.heap_item = HeapItem(iu)

1133

heapq.heappush(heap, iu.heap_item)

1134

1135

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1136

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1137

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1138

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1139

for u in sinks:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1140

if u not in G:

1141

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1142

s2.appendleft(u)

1143

del G[u]

1144

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1145

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1146

if not iu.outgoing:

1147

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1148

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1149

1150

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1151

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1152

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1153

for u in sources:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1154

if u not in G:

1155

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1156

s1.append(u)

1157

del G[u]

1158

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1159

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1160

if not iu.incoming:

1161

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1162

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1163

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1164

if not G:

1165

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1166

1167

# Find the "best" vertex to put next. "Best" is the one that

1168

# maximizes the net difference in source blocks saved we get by

1169

# pretending it's a source rather than a sink.

1170

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1171

while True:

1172

u = heapq.heappop(heap)

1173

if u and u.item in G:

1174

u = u.item

1175

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1176

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1177

s1.append(u)

1178

del G[u]

1179

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1180

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1181

if not iu.incoming:

1182

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1183

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1184

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1185

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1186

if not iu.outgoing:

1187

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1188

1189

# Now record the sequence in the 'order' field of each transfer,

1190

# and by rearranging self.transfers to be in the chosen sequence.

1191

1192

new_transfers = []

1193

for x in itertools.chain(s1, s2):

1194

x.order = len(new_transfers)

1195

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1200

1201

def GenerateDigraph(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1202

logger.info("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1203

1204

# Each item of source_ranges will be:

1205

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1206

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1207

source_ranges = []

1208

for b in self.transfers:

1209

for s, e in b.src_ranges:

1210

if e > len(source_ranges):

1211

source_ranges.extend([None] * (e-len(source_ranges)))

1212

for i in range(s, e):

1213

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1214

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1215

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1216

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1217

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1218

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1219

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1220

for s, e in a.tgt_ranges:

1221

for i in range(s, e):

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1222

if i >= len(source_ranges):

1223

break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1224

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1225

if source_ranges[i] is not None:

1226

for j in source_ranges[i]:

1227

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1228

1229

for b in intersections:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1230

if a is b:

1231

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1232

1233

# If the blocks written by A are read by B, then B needs to go before A.

1234

i = a.tgt_ranges.intersect(b.src_ranges)

1235

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1236

if b.src_name == "__ZERO":

1237

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1242

b.goes_before[a] = size

1243

a.goes_after[b] = size

1244

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1245

def ComputePatchesForInputList(self, diff_queue, compress_target):

1246

"""Returns a list of patch information for the input list of transfers.

1247

1248

Args:

1249

diff_queue: a list of transfers with style 'diff'

1250

compress_target: If True, compresses the target ranges of each

1251

transfers; and save the size.

1252

1253

Returns:

1254

A list of (transfer order, patch_info, compressed_size) tuples.

"""

if not diff_queue:

return []

if self.threads > 1:

logger.info("Computing patches (using %d threads)...", self.threads)

1262

else:

1263

logger.info("Computing patches...")

1264

1265

diff_total = len(diff_queue)

1266

patches = [None] * diff_total

1267

error_messages = []

1268

1269

# Using multiprocessing doesn't give additional benefits, due to the

1270

# pattern of the code. The diffing work is done by subprocess.call, which

1271

# already runs in a separate process (not affected much by the GIL -

1272

# Global Interpreter Lock). Using multiprocess also requires either a)

1273

# writing the diff input files in the main process before forking, or b)

1274

# reopening the image file (SparseImage) in the worker processes. Doing

1275

# neither of them further improves the performance.

1276

lock = threading.Lock()

def diff_worker():

while True:

with lock:

if not diff_queue:

return

xf_index, imgdiff, patch_index = diff_queue.pop()

1284

xf = self.transfers[xf_index]

1285

1286

message = []

1287

compressed_size = None

1288

1289

patch_info = xf.patch_info

1290

if not patch_info:

1291

src_file = common.MakeTempFile(prefix="src-")

1292

with open(src_file, "wb") as fd:

1293

self.src.WriteRangeDataToFd(xf.src_ranges, fd)

1294

1295

tgt_file = common.MakeTempFile(prefix="tgt-")

1296

with open(tgt_file, "wb") as fd:

1297

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)

1298

1299

try:

1300

patch_info = compute_patch(src_file, tgt_file, imgdiff)

1301

except ValueError as e:

1302

message.append(

1303

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

1304

"imgdiff" if imgdiff else "bsdiff",

1305

xf.tgt_name if xf.tgt_name == xf.src_name else

1306

xf.tgt_name + " (from " + xf.src_name + ")",

1307

xf.tgt_ranges, xf.src_ranges, e.message))

1308

1309

if compress_target:

1310

tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)

1311

try:

1312

# Compresses with the default level

1313

compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)

1314

compressed_data = (compress_obj.compress("".join(tgt_data))

1315

+ compress_obj.flush())

1316

compressed_size = len(compressed_data)

1317

except zlib.error as e:

1318

message.append(

1319

"Failed to compress the data in target range {} for {}:\n"

1320

"{}".format(xf.tgt_ranges, xf.tgt_name, e.message))

if message:

with lock:

error_messages.extend(message)

1325

1326

with lock:

1327

patches[patch_index] = (xf_index, patch_info, compressed_size)

1328

1329

threads = [threading.Thread(target=diff_worker)

1330

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

if error_messages:

logger.error('ERROR:')

1338

logger.error('\n'.join(error_messages))

1339

logger.error('\n\n\n')

sys.exit(1)

return patches

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame^]

1344

def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1345

"""Converts the diff transfers to reduce the max simultaneous stash.

1346

1347

Since the 'new' data is compressed with deflate, we can select the 'diff'

1348

transfers for conversion by comparing its patch size with the size of the

1349

compressed data. Ideally, we want to convert the transfers with a small

1350

size increase, but using a large number of stashed blocks.

1351

"""

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame^]

1352

TransferSizeScore = namedtuple("TransferSizeScore",

1353

"xf, used_stash_blocks, score")

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1354

1355

logger.info("Selecting diff commands to convert to new.")

1356

diff_queue = []

1357

for xf in self.transfers:

1358

if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:

1359

use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,

1360

xf.src_ranges)

1361

diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))

1362

1363

# Remove the 'move' transfers, and compute the patch & compressed size

1364

# for the remaining.

1365

result = self.ComputePatchesForInputList(diff_queue, True)

1366

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame^]

1367

conversion_candidates = []

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1368

for xf_index, patch_info, compressed_size in result:

1369

xf = self.transfers[xf_index]

1370

if not xf.patch_info:

1371

xf.patch_info = patch_info

1372

1373

size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size

1374

diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame^]

1375

logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1376

" compression_size: %d, ratio %.2f%%", xf.tgt_name,

1377

xf.tgt_ranges.size(), diff_style,

1378

len(xf.patch_info.content), compressed_size, size_ratio)

1379

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame^]

1380

used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1381

# Convert the transfer to new if the compressed size is smaller or equal.

1382

# We don't need to maintain the stash_before lists here because the

1383

# graph will be regenerated later.

1384

if len(xf.patch_info.content) >= compressed_size:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame^]

1385

# Add the transfer to the candidate list with negative score. And it

1386

# will be converted later.

1387

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1388

-1))

1389

elif used_stash_blocks > 0:

1390

# This heuristic represents the size increase in the final package to

1391

# remove per unit of stashed data.

1392

score = ((compressed_size - len(xf.patch_info.content)) * 100.0

1393

/ used_stash_blocks)

1394

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1395

score))

1396

# Transfers with lower score (i.e. less expensive to convert) will be

1397

# converted first.

1398

conversion_candidates.sort(key=lambda x: x.score)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1399

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame^]

1400

# TODO(xunchang), improve the logic to find the transfers to convert, e.g.

1401

# convert the ones that contribute to the max stash, run ReviseStashSize

1402

# multiple times etc.

1403

removed_stashed_blocks = 0

1404

for xf, used_stash_blocks, _ in conversion_candidates:

1405

logger.info("Converting %s to new", xf.tgt_name)

1406

xf.ConvertToNew()

1407

removed_stashed_blocks += used_stash_blocks

1408

# Experiments show that we will get a smaller package size if we remove

1409

# slightly more stashed blocks than the violated stash blocks.

1410

if removed_stashed_blocks >= violated_stash_blocks:

1411

break

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1412

1413

logger.info("Removed %d stashed blocks", removed_stashed_blocks)

1414

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1415

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1416

"""Parse the file_map to generate all the transfers."""

1417

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1418

def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1419

src_ranges, style, by_id):

1420

"""Add one or multiple Transfer()s by splitting large files.

1421

1422

For BBOTA v3, we need to stash source blocks for resumable feature.

1423

However, with the growth of file size and the shrink of the cache

1424

partition source blocks are too large to be stashed. If a file occupies

1425

too many blocks, we split it into smaller pieces by getting multiple

1426

Transfer()s.

1427

1428

The downside is that after splitting, we may increase the package size

1429

since the split pieces don't align well. According to our experiments,

1430

1/8 of the cache size as the per-piece limit appears to be optimal.

1431

Compared to the fixed 1024-block limit, it reduces the overall package

1432

size by 30% for volantis, and 20% for angler and bullhead."""

1433

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1434

pieces = 0

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1435

while (tgt_ranges.size() > max_blocks_per_transfer and

1436

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1437

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1438

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1439

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1440

src_first = src_ranges.first(max_blocks_per_transfer)

1441

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1442

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1443

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1444

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1445

1446

tgt_ranges = tgt_ranges.subtract(tgt_first)

1447

src_ranges = src_ranges.subtract(src_first)

1448

pieces += 1

1449

1450

# Handle remaining blocks.

1451

if tgt_ranges.size() or src_ranges.size():

1452

# Must be both non-empty.

1453

assert tgt_ranges.size() and src_ranges.size()

1454

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1455

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1456

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1457

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1458

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1459

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1460

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,

1461

by_id):

1462

"""Find all the zip files and split the others with a fixed chunk size.

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1463

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1464

This function will construct a list of zip archives, which will later be

1465

split by imgdiff to reduce the final patch size. For the other files,

1466

we will plainly split them based on a fixed chunk size with the potential

1467

patch size penalty.

1468

"""

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1469

1470

assert style == "diff"

1471

1472

# Change nothing for small files.

1473

if (tgt_ranges.size() <= max_blocks_per_transfer and

1474

src_ranges.size() <= max_blocks_per_transfer):

1475

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1476

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

style, by_id)

return

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

1480

# Split large APKs with imgdiff, if possible. We're intentionally checking

1481

# file types one more time (CanUseImgdiff() checks that as well), before

1482

# calling the costly RangeSha1()s.

1483

if (self.FileTypeSupportedByImgdiff(tgt_name) and

1484

self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

1485

if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1486

large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))

1487

return

1488

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1489

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1490

src_ranges, style, by_id)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1491

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1492

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1493

split=False):

1494

"""Wrapper function for adding a Transfer()."""

1495

1496

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1497

# otherwise add the Transfer() as is.

1498

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1499

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1500

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1501

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1502

return

1503

1504

# Handle .odex files specially to analyze the block-wise difference. If

1505

# most of the blocks are identical with only few changes (e.g. header),

1506

# we will patch the changed blocks only. This avoids stashing unchanged

1507

# blocks while patching. We limit the analysis to files without size

1508

# changes only. This is to avoid sacrificing the OTA generation cost too

1509

# much.

1510

if (tgt_name.split(".")[-1].lower() == 'odex' and

1511

tgt_ranges.size() == src_ranges.size()):

1512

1513

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1514

# few blocks remain identical, we lose the opportunity to use imgdiff

1515

# that may have better compression ratio than bsdiff.

1516

crop_threshold = 0.5

1517

1518

tgt_skipped = RangeSet()

1519

src_skipped = RangeSet()

1520

tgt_size = tgt_ranges.size()

1521

tgt_changed = 0

1522

for src_block, tgt_block in zip(src_ranges.next_item(),

1523

tgt_ranges.next_item()):

1524

src_rs = RangeSet(str(src_block))

1525

tgt_rs = RangeSet(str(tgt_block))

1526

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1527

tgt_skipped = tgt_skipped.union(tgt_rs)

1528

src_skipped = src_skipped.union(src_rs)

1529

else:

1530

tgt_changed += tgt_rs.size()

1531

1532

# Terminate early if no clear sign of benefits.

1533

if tgt_changed > tgt_size * crop_threshold:

1534

break

1535

1536

if tgt_changed < tgt_size * crop_threshold:

1537

assert tgt_changed + tgt_skipped.size() == tgt_size

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1538

logger.info(

1539

'%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,

1540

tgt_skipped.size() * 100.0 / tgt_size, tgt_name)

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1541

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1542

"%s-skipped" % (tgt_name,),

1543

"%s-skipped" % (src_name,),

1544

tgt_skipped, src_skipped, style, by_id)

1545

1546

# Intentionally change the file extension to avoid being imgdiff'd as

1547

# the files are no longer in their original format.

1548

tgt_name = "%s-cropped" % (tgt_name,)

1549

src_name = "%s-cropped" % (src_name,)

1550

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1551

src_ranges = src_ranges.subtract(src_skipped)

1552

1553

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1558

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1559

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1560

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1561

def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,

1562

split_info):

1563

"""Parse the split_info and return a list of info tuples.

1564

1565

Args:

1566

patch_size: total size of the patch file.

1567

tgt_ranges: Ranges of the target file within the original image.

1568

src_ranges: Ranges of the source file within the original image.

split_info format:

imgdiff version#

count of pieces

<patch_size_1> <tgt_size_1> <src_ranges_1>

1573

...

1574

<patch_size_n> <tgt_size_n> <src_ranges_n>

1575

1576

Returns:

1577

[patch_start, patch_len, split_tgt_ranges, split_src_ranges]

1578

"""

1579

1580

version = int(split_info[0])

1581

assert version == 2

1582

count = int(split_info[1])

1583

assert len(split_info) - 2 == count

split_info_list = []

patch_start = 0

tgt_remain = copy.deepcopy(tgt_ranges)

1588

# each line has the format <patch_size>, <tgt_size>, <src_ranges>

1589

for line in split_info[2:]:

1590

info = line.split()

1591

assert len(info) == 3

1592

patch_length = int(info[0])

1593

1594

split_tgt_size = int(info[1])

1595

assert split_tgt_size % 4096 == 0

1596

assert split_tgt_size / 4096 <= tgt_remain.size()

1597

split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)

1598

tgt_remain = tgt_remain.subtract(split_tgt_ranges)

1599

1600

# Find the split_src_ranges within the image file from its relative

1601

# position in file.

1602

split_src_indices = RangeSet.parse_raw(info[2])

1603

split_src_ranges = RangeSet()

1604

for r in split_src_indices:

1605

curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))

1606

assert not split_src_ranges.overlaps(curr_range)

1607

split_src_ranges = split_src_ranges.union(curr_range)

1608

1609

split_info_list.append((patch_start, patch_length,

1610

split_tgt_ranges, split_src_ranges))

1611

patch_start += patch_length

1612

1613

# Check that the sizes of all the split pieces add up to the final file

1614

# size for patch and target.

1615

assert tgt_remain.size() == 0

1616

assert patch_start == patch_size

1617

return split_info_list

1618

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1619

def SplitLargeApks():

1620

"""Split the large apks files.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1621

1622

Example: Chrome.apk will be split into

1623

src-0: Chrome.apk-0, tgt-0: Chrome.apk-0

1624

src-1: Chrome.apk-1, tgt-1: Chrome.apk-1

1625

...

1626

1627

After the split, the target pieces are continuous and block aligned; and

1628

the source pieces are mutually exclusive. During the split, we also

1629

generate and save the image patch between src-X & tgt-X. This patch will

1630

be valid because the block ranges of src-X & tgt-X will always stay the

1631

same afterwards; but there's a chance we don't use the patch if we

1632

convert the "diff" command into "new" or "move" later.

"""

while True:

with transfer_lock:

if not large_apks:

return

tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)

1640

1641

src_file = common.MakeTempFile(prefix="src-")

1642

tgt_file = common.MakeTempFile(prefix="tgt-")

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

1643

with open(src_file, "wb") as src_fd:

1644

self.src.WriteRangeDataToFd(src_ranges, src_fd)

1645

with open(tgt_file, "wb") as tgt_fd:

1646

self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1647

1648

patch_file = common.MakeTempFile(prefix="patch-")

1649

patch_info_file = common.MakeTempFile(prefix="split_info-")

1650

cmd = ["imgdiff", "-z",

1651

"--block-limit={}".format(max_blocks_per_transfer),

1652

"--split-info=" + patch_info_file,

1653

src_file, tgt_file, patch_file]

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

1654

proc = common.Run(cmd)

1655

imgdiff_output, _ = proc.communicate()

1656

assert proc.returncode == 0, \

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

1657

"Failed to create imgdiff patch between {} and {}:\n{}".format(

1658

src_name, tgt_name, imgdiff_output)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1659

1660

with open(patch_info_file) as patch_info:

1661

lines = patch_info.readlines()

1662

1663

patch_size_total = os.path.getsize(patch_file)

1664

split_info_list = ParseAndValidateSplitInfo(patch_size_total,

1665

tgt_ranges, src_ranges,

1666

lines)

1667

for index, (patch_start, patch_length, split_tgt_ranges,

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1668

split_src_ranges) in enumerate(split_info_list):

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1669

with open(patch_file) as f:

1670

f.seek(patch_start)

1671

patch_content = f.read(patch_length)

1672

1673

split_src_name = "{}-{}".format(src_name, index)

1674

split_tgt_name = "{}-{}".format(tgt_name, index)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1675

split_large_apks.append((split_tgt_name,

split_src_name,

split_tgt_ranges,

split_src_ranges,

patch_content))

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1680

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1681

logger.info("Finding transfers...")

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1682

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1683

large_apks = []

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1684

split_large_apks = []

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1685

cache_size = common.OPTIONS.cache_size

1686

split_threshold = 0.125

1687

max_blocks_per_transfer = int(cache_size * split_threshold /

1688

self.tgt.blocksize)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1689

empty = RangeSet()

Tianjie Xu

20a86cd

2018-01-12 12:21:00 -0800

[diff] [blame]

1690

for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1691

if tgt_fn == "__ZERO":

1692

# the special "__ZERO" domain is all the blocks not contained

1693

# in any file and that are filled with zeros. We have a

1694

# special transfer style for zero blocks.

1695

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1696

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1697

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1698

continue

1699

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1700

elif tgt_fn == "__COPY":

1701

# "__COPY" domain includes all the blocks not contained in any

1702

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1703

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1704

continue

1705

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

1706

elif tgt_fn == "__HASHTREE":

1707

continue

1708

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1709

elif tgt_fn in self.src.file_map:

1710

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1711

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1712

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1713

continue

1714

1715

b = os.path.basename(tgt_fn)

1716

if b in self.src_basenames:

1717

# Look for an exact basename match in the source.

1718

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1719

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1720

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1721

continue

1722

1723

b = re.sub("[0-9]+", "#", b)

1724

if b in self.src_numpatterns:

1725

# Look for a 'number pattern' match (a basename match after

1726

# all runs of digits are replaced by "#"). (This is useful

1727

# for .so files that contain version numbers in the filename

1728

# that get bumped.)

1729

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1730

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1731

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1732

continue

1733

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1734

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1735

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1736

transfer_lock = threading.Lock()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1737

threads = [threading.Thread(target=SplitLargeApks)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1738

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1744

# Sort the split transfers for large apks to generate a determinate package.

1745

split_large_apks.sort()

1746

for (tgt_name, src_name, tgt_ranges, src_ranges,

1747

patch) in split_large_apks:

1748

transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1749

self.tgt.RangeSha1(tgt_ranges),

1750

self.src.RangeSha1(src_ranges),

1751

"diff", self.transfers)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1752

transfer_split.patch_info = PatchInfo(True, patch)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1753

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1754

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1755

for k in self.src.file_map.keys():

1756

b = os.path.basename(k)

1757

self.src_basenames[b] = k

1758

b = re.sub("[0-9]+", "#", b)

1759

self.src_numpatterns[b] = k

1760

1761

@staticmethod

1762

def AssertPartition(total, seq):

1763

"""Assert that all the RangeSets in 'seq' form a partition of the

1764

'total' RangeSet (ie, they are nonintersecting and their union

1765

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1766

Doug Zongker