Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

18

import copy

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

20

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

21

import itertools

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

22

import logging

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

23

import multiprocessing

24

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

25

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

27

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

29

import zlib

30

from collections import deque, namedtuple, OrderedDict

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

31

from hashlib import sha1

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

32

33

import common

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

34

from rangelib import RangeSet

35

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

36

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

37

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

38

logger = logging.getLogger(__name__)

39

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

40

# The tuple contains the style and bytes of a bsdiff|imgdiff patch.

41

PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])

42

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

43

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

44

def compute_patch(srcfile, tgtfile, imgdiff=False):

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

45

"""Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

46

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

47

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

48

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

49

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

50

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

51

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

52

# here, since they contain temp filenames only.

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

53

proc = common.Run(cmd, verbose=False)

54

output, _ = proc.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

55

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

56

if proc.returncode != 0:

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

57

raise ValueError(output)

58

59

with open(patchfile, 'rb') as f:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

60

return PatchInfo(imgdiff, f.read())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

61

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

62

63

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

64

def RangeSha1(self, ranges):

65

raise NotImplementedError

66

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

67

def ReadRangeSet(self, ranges):

68

raise NotImplementedError

69

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

70

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

71

raise NotImplementedError

72

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

73

def WriteRangeDataToFd(self, ranges, fd):

74

raise NotImplementedError

75

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

76

77

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

78

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

79

80

def __init__(self):

81

self.blocksize = 4096

82

self.care_map = RangeSet()

83

self.clobbered_blocks = RangeSet()

84

self.extended = RangeSet()

85

self.total_blocks = 0

86

self.file_map = {}

Yifan Hong

bb2658d

2019-01-25 12:30:58 -0800

[diff] [blame]

87

self.hashtree_info = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

88

89

def RangeSha1(self, ranges):

90

return sha1().hexdigest()

91

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

92

def ReadRangeSet(self, ranges):

93

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

94

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

95

def TotalSha1(self, include_clobbered_blocks=False):

96

# EmptyImage always carries empty clobbered_blocks, so

97

# include_clobbered_blocks can be ignored.

98

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

99

return sha1().hexdigest()

100

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

101

def WriteRangeDataToFd(self, ranges, fd):

102

raise ValueError("Can't write data from EmptyImage to file")

103

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

104

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

105

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

106

"""An image wrapped around a single string of data."""

107

108

def __init__(self, data, trim=False, pad=False):

109

self.data = data

110

self.blocksize = 4096

111

112

assert not (trim and pad)

113

114

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

115

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

116

if partial > 0:

117

if trim:

118

self.data = self.data[:-partial]

119

elif pad:

120

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

121

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

122

else:

123

raise ValueError(("data for DataImage must be multiple of %d bytes "

124

"unless trim or pad is specified") %

125

(self.blocksize,))

126

127

assert len(self.data) % self.blocksize == 0

128

129

self.total_blocks = len(self.data) / self.blocksize

130

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

131

# When the last block is padded, we always write the whole block even for

132

# incremental OTAs. Because otherwise the last block may get skipped if

133

# unchanged for an incremental, but would fail the post-install

134

# verification if it has non-zero contents in the padding bytes.

135

# Bug: 23828506

136

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

137

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

138

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

139

clobbered_blocks = []

140

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

141

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

146

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

147

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

148

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

149

if d == reference:

150

zero_blocks.append(i)

151

zero_blocks.append(i+1)

152

else:

153

nonzero_blocks.append(i)

154

nonzero_blocks.append(i+1)

155

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

156

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

157

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

158

self.file_map = dict()

159

if zero_blocks:

160

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

161

if nonzero_blocks:

162

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

163

if clobbered_blocks:

164

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

165

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

166

def _GetRangeData(self, ranges):

167

for s, e in ranges:

168

yield self.data[s*self.blocksize:e*self.blocksize]

169

170

def RangeSha1(self, ranges):

171

h = sha1()

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

172

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

176

def ReadRangeSet(self, ranges):

Yifan Hong

6f3eaeb

2019-04-09 16:49:33 -0700

[diff] [blame^]

177

return list(self._GetRangeData(ranges))

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

178

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

179

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

180

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

181

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

182

else:

183

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

184

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

185

def WriteRangeDataToFd(self, ranges, fd):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

186

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

187

fd.write(data)

188

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

189

190

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

191

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

192

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

193

self.tgt_name = tgt_name

194

self.src_name = src_name

195

self.tgt_ranges = tgt_ranges

196

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

197

self.tgt_sha1 = tgt_sha1

198

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

199

self.style = style

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

200

201

# We use OrderedDict rather than dict so that the output is repeatable;

202

# otherwise it would depend on the hash values of the Transfer objects.

203

self.goes_before = OrderedDict()

204

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

205

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

206

self.stash_before = []

207

self.use_stash = []

208

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

212

self._patch_info = None

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

213

214

@property

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

215

def patch_info(self):

216

return self._patch_info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

217

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

218

@patch_info.setter

219

def patch_info(self, info):

220

if info:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

221

assert self.style == "diff"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

222

self._patch_info = info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

223

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

224

def NetStashChange(self):

225

return (sum(sr.size() for (_, sr) in self.stash_before) -

226

sum(sr.size() for (_, sr) in self.use_stash))

227

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

228

def ConvertToNew(self):

229

assert self.style != "new"

230

self.use_stash = []

231

self.style = "new"

232

self.src_ranges = RangeSet()

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

233

self.patch_info = None

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

234

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

235

def __str__(self):

236

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

237

" to " + str(self.tgt_ranges) + ">")

238

239

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

240

@functools.total_ordering

241

class HeapItem(object):

242

def __init__(self, item):

243

self.item = item

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

244

# Negate the score since python's heap is a min-heap and we want the

245

# maximum score.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

246

self.score = -item.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

247

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

248

def clear(self):

249

self.item = None

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

250

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

251

def __bool__(self):

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

252

return self.item is not None

253

254

# Python 2 uses __nonzero__, while Python 3 uses __bool__.

255

__nonzero__ = __bool__

256

257

# The rest operations are generated by functools.total_ordering decorator.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

258

def __eq__(self, other):

259

return self.score == other.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

260

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

261

def __le__(self, other):

262

return self.score <= other.score

263

264

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

265

class ImgdiffStats(object):

266

"""A class that collects imgdiff stats.

267

268

It keeps track of the files that will be applied imgdiff while generating

269

BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific

270

reasons. The stats is only meaningful when imgdiff not being disabled by the

271

caller of BlockImageDiff. In addition, only files with supported types

272

(BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

273

"""

274

275

USED_IMGDIFF = "APK files diff'd with imgdiff"

276

USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"

277

278

# Reasons for not applying imgdiff on APKs.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

279

SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

280

SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

281

SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

282

283

# The list of valid reasons, which will also be the dumped order in a report.

284

REASONS = (

285

USED_IMGDIFF,

286

USED_IMGDIFF_LARGE_APK,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

287

SKIPPED_NONMONOTONIC,

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

288

SKIPPED_SHARED_BLOCKS,

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

289

SKIPPED_INCOMPLETE,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

)

def __init__(self):

self.stats = {}

def Log(self, filename, reason):

296

"""Logs why imgdiff can or cannot be applied to the given filename.

297

298

Args:

299

filename: The filename string.

300

reason: One of the reason constants listed in REASONS.

301

302

Raises:

303

AssertionError: On unsupported filetypes or invalid reason.

304

"""

305

assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)

306

assert reason in self.REASONS

307

308

if reason not in self.stats:

309

self.stats[reason] = set()

310

self.stats[reason].add(filename)

311

312

def Report(self):

313

"""Prints a report of the collected imgdiff stats."""

314

315

def print_header(header, separator):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

316

logger.info(header)

317

logger.info(separator * len(header) + '\n')

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

318

319

print_header(' Imgdiff Stats Report ', '=')

320

for key in self.REASONS:

321

if key not in self.stats:

322

continue

323

values = self.stats[key]

324

section_header = ' {} (count: {}) '.format(key, len(values))

325

print_header(section_header, '-')

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

326

logger.info(''.join([' {}\n'.format(name) for name in values]))

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

327

328

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

329

class BlockImageDiff(object):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

330

"""Generates the diff of two block image objects.

331

332

BlockImageDiff works on two image objects. An image object is anything that

333

provides the following attributes:

334

335

blocksize: the size in bytes of a block, currently must be 4096.

336

337

total_blocks: the total size of the partition/image, in blocks.

338

339

care_map: a RangeSet containing which blocks (in the range [0,

340

total_blocks) we actually care about; i.e. which blocks contain data.

341

342

file_map: a dict that partitions the blocks contained in care_map into

343

smaller domains that are useful for doing diffs on. (Typically a domain

344

is a file, and the key in file_map is the pathname.)

345

346

clobbered_blocks: a RangeSet containing which blocks contain data but may

347

be altered by the FS. They need to be excluded when verifying the

348

partition integrity.

349

350

ReadRangeSet(): a function that takes a RangeSet and returns the data

351

contained in the image blocks of that RangeSet. The data is returned as

352

a list or tuple of strings; concatenating the elements together should

353

produce the requested data. Implementations are free to break up the

354

data into list/tuple elements in any way that is convenient.

355

356

RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of

357

all the data in the specified range.

358

359

TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of

360

all the data in the image (ie, all the blocks in the care_map minus

361

clobbered_blocks, or including the clobbered blocks if

362

include_clobbered_blocks is True).

363

364

When creating a BlockImageDiff, the src image may be None, in which case the

365

list of transfers produced will never read from the original image.

366

"""

367

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

368

def __init__(self, tgt, src=None, threads=None, version=4,

369

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

370

if threads is None:

371

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

372

if threads == 0:

373

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

374

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

375

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

376

self.transfers = []

377

self.src_basenames = {}

378

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

379

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

380

self.touched_src_ranges = RangeSet()

381

self.touched_src_sha1 = None

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

382

self.disable_imgdiff = disable_imgdiff

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

383

self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

384

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

385

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

393

assert tgt.blocksize == 4096

394

assert src.blocksize == 4096

395

396

# The range sets in each filemap should comprise a partition of

397

# the care map.

398

self.AssertPartition(src.care_map, src.file_map.values())

399

self.AssertPartition(tgt.care_map, tgt.file_map.values())

400

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

401

@property

402

def max_stashed_size(self):

403

return self._max_stashed_size

404

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

405

@staticmethod

406

def FileTypeSupportedByImgdiff(filename):

407

"""Returns whether the file type is supported by imgdiff."""

408

return filename.lower().endswith(('.apk', '.jar', '.zip'))

409

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

410

def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

411

"""Checks whether we can apply imgdiff for the given RangeSets.

412

413

For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use

414

'imgdiff -z' if possible. Because it usually produces significantly smaller

415

patches than bsdiff.

416

417

This is permissible if all of the following conditions hold.

418

- The imgdiff hasn't been disabled by the caller (e.g. squashfs);

419

- The file type is supported by imgdiff;

420

- The source and target blocks are monotonic (i.e. the data is stored with

421

blocks in increasing order);

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

422

- Both files don't contain shared blocks;

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

423

- Both files have complete lists of blocks;

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

424

- We haven't removed any blocks from the source set.

425

426

If all these conditions are satisfied, concatenating all the blocks in the

427

RangeSet in order will produce a valid ZIP file (plus possibly extra zeros

428

in the last block). imgdiff is fine with extra zeros at the end of the file.

429

430

Args:

431

name: The filename to be diff'd.

432

tgt_ranges: The target RangeSet.

433

src_ranges: The source RangeSet.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

434

large_apk: Whether this is to split a large APK.

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

Returns:

A boolean result.

"""

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

439

if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

440

return False

441

442

if not tgt_ranges.monotonic or not src_ranges.monotonic:

443

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)

444

return False

445

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

446

if (tgt_ranges.extra.get('uses_shared_blocks') or

447

src_ranges.extra.get('uses_shared_blocks')):

448

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)

449

return False

450

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

451

if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):

452

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)

453

return False

454

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

455

reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk

456

else ImgdiffStats.USED_IMGDIFF)

457

self.imgdiff_stats.Log(name, reason)

458

return True

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

459

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

460

def Compute(self, prefix):

461

# When looking for a source file to use as the diff input for a

462

# target file, we try:

463

# 1) an exact path match if available, otherwise

464

# 2) a exact basename match if available, otherwise

465

# 3) a basename match after all runs of digits are replaced by

466

# "#" if available, otherwise

467

# 4) we have no source for this target.

468

self.AbbreviateSourceNames()

469

self.FindTransfers()

470

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

471

self.FindSequenceForTransfers()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

472

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

473

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

474

if common.OPTIONS.cache_size is not None:

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

475

stash_limit = (common.OPTIONS.cache_size *

476

common.OPTIONS.stash_threshold / self.tgt.blocksize)

477

# Ignore the stash limit and calculate the maximum simultaneously stashed

478

# blocks needed.

479

_, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)

480

481

# We cannot stash more blocks than the stash limit simultaneously. As a

482

# result, some 'diff' commands will be converted to new; leading to an

483

# unintended large package. To mitigate this issue, we can carefully

484

# choose the transfers for conversion. The number '1024' can be further

485

# tweaked here to balance the package size and build time.

486

if max_stashed_blocks > stash_limit + 1024:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

487

self.SelectAndConvertDiffTransfersToNew(

488

max_stashed_blocks - stash_limit)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

489

# Regenerate the sequence as the graph has changed.

490

self.FindSequenceForTransfers()

491

492

# Revise the stash size again to keep the size under limit.

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

493

self.ReviseStashSize()

494

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

495

# Double-check our work.

496

self.AssertSequenceGood()

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

497

self.AssertSha1Good()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

498

499

self.ComputePatches(prefix)

500

self.WriteTransfers(prefix)

501

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

502

# Report the imgdiff stats.

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

503

if not self.disable_imgdiff:

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

504

self.imgdiff_stats.Report()

505

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

506

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

507

def WriteSplitTransfers(out, style, target_blocks):

508

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

509

510

This prevents the target size of one command from being too large; and

511

might help to avoid fsync errors on some devices."""

512

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

513

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

514

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

515

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

516

while target_blocks:

517

blocks_to_write = target_blocks.first(blocks_limit)

518

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

519

total += blocks_to_write.size()

520

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

521

return total

522

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

523

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

524

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

525

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

526

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

527

# id. 'stashes' records the map from 'hash' to the ref count. The stash

528

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

529

stashes = {}

530

stashed_blocks = 0

531

max_stashed_blocks = 0

532

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

533

for xf in self.transfers:

534

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

535

for _, sr in xf.stash_before:

536

sh = self.src.RangeSha1(sr)

537

if sh in stashes:

538

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

539

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

540

stashes[sh] = 1

541

stashed_blocks += sr.size()

542

self.touched_src_ranges = self.touched_src_ranges.union(sr)

543

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

544

545

if stashed_blocks > max_stashed_blocks:

546

max_stashed_blocks = stashed_blocks

547

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

548

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

549

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

550

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

551

# <# blocks> <src ranges>

552

# OR

553

# <# blocks> <src ranges> <src locs> <stash refs...>

554

# OR

555

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

556

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

557

size = xf.src_ranges.size()

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

558

src_str_buffer = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

559

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

560

unstashed_src_ranges = xf.src_ranges

561

mapped_stashes = []

562

for _, sr in xf.use_stash:

563

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

564

sh = self.src.RangeSha1(sr)

565

sr = xf.src_ranges.map_within(sr)

566

mapped_stashes.append(sr)

567

assert sh in stashes

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

568

src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

569

stashes[sh] -= 1

570

if stashes[sh] == 0:

571

free_string.append("free %s\n" % (sh,))

572

free_size += sr.size()

573

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

574

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

575

if unstashed_src_ranges:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

576

src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

577

if xf.use_stash:

578

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

579

src_str_buffer.insert(2, mapped_unstashed.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

580

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

581

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

582

else:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

583

src_str_buffer.insert(1, "-")

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

584

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

585

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

586

src_str = " ".join(src_str_buffer)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

587

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

588

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

589

# zero <rangeset>

590

# new <rangeset>

591

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

592

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

593

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

594

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

595

596

tgt_size = xf.tgt_ranges.size()

597

598

if xf.style == "new":

599

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

600

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

601

total += tgt_size

602

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

603

assert xf.tgt_ranges

604

assert xf.src_ranges.size() == tgt_size

605

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

606

# take into account automatic stashing of overlapping blocks

607

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

608

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

609

if temp_stash_usage > max_stashed_blocks:

610

max_stashed_blocks = temp_stash_usage

611

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

612

self.touched_src_ranges = self.touched_src_ranges.union(

613

xf.src_ranges)

614

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

615

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

616

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

617

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

618

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

619

total += tgt_size

620

elif xf.style in ("bsdiff", "imgdiff"):

621

assert xf.tgt_ranges

622

assert xf.src_ranges

623

# take into account automatic stashing of overlapping blocks

624

if xf.src_ranges.overlaps(xf.tgt_ranges):

625

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

626

if temp_stash_usage > max_stashed_blocks:

627

max_stashed_blocks = temp_stash_usage

628

629

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

630

631

out.append("%s %d %d %s %s %s %s\n" % (

632

xf.style,

633

xf.patch_start, xf.patch_len,

634

xf.src_sha1,

635

xf.tgt_sha1,

636

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

637

total += tgt_size

638

elif xf.style == "zero":

639

assert xf.tgt_ranges

640

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

641

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

642

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

643

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

644

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

645

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

646

if free_string:

647

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

648

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

649

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

650

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

651

# Sanity check: abort if we're going to need more stash space than

652

# the allowed size (cache_size * threshold). There are two purposes

653

# of having a threshold here. a) Part of the cache may have been

654

# occupied by some recovery logs. b) It will buy us some time to deal

655

# with the oversize issue.

656

cache_size = common.OPTIONS.cache_size

657

stash_threshold = common.OPTIONS.stash_threshold

658

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

659

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

660

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

661

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

662

self.tgt.blocksize, max_allowed, cache_size,

663

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

664

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

665

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

666

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

667

if self.tgt.hashtree_info:

668

out.append("compute_hash_tree {} {} {} {} {}\n".format(

669

self.tgt.hashtree_info.hashtree_range.to_string_raw(),

670

self.tgt.hashtree_info.filesystem_range.to_string_raw(),

671

self.tgt.hashtree_info.hash_algorithm,

672

self.tgt.hashtree_info.salt,

673

self.tgt.hashtree_info.root_hash))

674

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

675

# Zero out extended blocks as a workaround for bug 20881595.

676

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

677

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

678

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

679

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

680

681

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

682

# data in the new image; b) will not be touched by dm-verity. Out of those

683

# blocks, we erase the ones that won't be used in this update at the

684

# beginning of an update. The rest would be erased at the end. This is to

685

# work around the eMMC issue observed on some devices, which may otherwise

686

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

687

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

688

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

689

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

690

691

erase_first = new_dontcare.subtract(self.touched_src_ranges)

692

if erase_first:

693

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

694

695

erase_last = new_dontcare.subtract(erase_first)

696

if erase_last:

697

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

698

699

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

700

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

701

# v3+: the number of stash slots is unused.

702

out.insert(2, "0\n")

703

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

704

705

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

709

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

710

OPTIONS = common.OPTIONS

711

if OPTIONS.cache_size is not None:

712

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

713

logger.info(

714

"max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",

715

max_stashed_blocks, self._max_stashed_size, max_allowed,

716

self._max_stashed_size * 100.0 / max_allowed)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

717

else:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

718

logger.info(

719

"max stashed blocks: %d (%d bytes), limit: <unknown>\n",

720

max_stashed_blocks, self._max_stashed_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

721

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

722

def ReviseStashSize(self, ignore_stash_limit=False):

723

""" Revises the transfers to keep the stash size within the size limit.

724

725

Iterates through the transfer list and calculates the stash size each

726

transfer generates. Converts the affected transfers to new if we reach the

stash limit.

Args:

ignore_stash_limit: Ignores the stash limit and calculates the max

731

simultaneous stashed blocks instead. No change will be made to the

732

transfer list with this flag.

733

734

Return:

735

A tuple of (tgt blocks converted to new, max stashed blocks)

736

"""

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

737

logger.info("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

738

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

739

740

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

741

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

742

for xf in self.transfers:

743

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

744

for stash_raw_id, sr in xf.stash_before:

745

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

746

747

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

748

for stash_raw_id, _ in xf.use_stash:

749

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

750

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

751

max_allowed_blocks = None

752

if not ignore_stash_limit:

753

# Compute the maximum blocks available for stash based on /cache size and

754

# the threshold.

755

cache_size = common.OPTIONS.cache_size

756

stash_threshold = common.OPTIONS.stash_threshold

757

max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

758

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

759

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

760

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

761

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

762

new_blocks = 0

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

763

max_stashed_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

764

765

# Now go through all the commands. Compute the required stash size on the

766

# fly. If a command requires excess stash than available, it deletes the

767

# stash by replacing the command that uses the stash with a "new" command

768

# instead.

769

for xf in self.transfers:

770

replaced_cmds = []

771

772

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

773

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

774

# Check the post-command stashed_blocks.

775

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

776

sh = self.src.RangeSha1(sr)

777

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

778

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

779

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

780

if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

781

# We cannot stash this one for a later command. Find out the command

782

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

783

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

784

replaced_cmds.append(use_cmd)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

785

logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

786

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

787

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

788

if sh in stashes:

789

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

790

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

791

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

792

stashed_blocks = stashed_blocks_after

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

793

max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

794

795

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

796

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

797

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

798

assert xf.tgt_ranges and xf.src_ranges

799

if xf.src_ranges.overlaps(xf.tgt_ranges):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

800

if (max_allowed_blocks and

801

stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

802

replaced_cmds.append(xf)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

803

logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

804

else:

805

# The whole source ranges will be stashed for implicit stashes.

806

max_stashed_blocks = max(max_stashed_blocks,

807

stashed_blocks + xf.src_ranges.size())

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

808

809

# Replace the commands in replaced_cmds with "new"s.

810

for cmd in replaced_cmds:

811

# It no longer uses any commands in "use_stash". Remove the def points

812

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

813

for stash_raw_id, sr in cmd.use_stash:

814

def_cmd = stash_map[stash_raw_id][1]

815

assert (stash_raw_id, sr) in def_cmd.stash_before

816

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

817

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

818

# Add up blocks that violates space limit and print total number to

819

# screen later.

820

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

821

cmd.ConvertToNew()

822

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

823

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

824

for _, sr in xf.use_stash:

825

sh = self.src.RangeSha1(sr)

826

assert sh in stashes

827

stashes[sh] -= 1

828

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

829

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

830

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

831

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

832

num_of_bytes = new_blocks * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

833

logger.info(

834

" Total %d blocks (%d bytes) are packed as new blocks due to "

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

835

"insufficient cache size. Maximum blocks stashed simultaneously: %d",

836

new_blocks, num_of_bytes, max_stashed_blocks)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

837

return new_blocks, max_stashed_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

838

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

839

def ComputePatches(self, prefix):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

840

logger.info("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

841

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

842

patch_num = 0

843

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

844

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

845

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

846

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

847

logger.info(

848

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

849

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

850

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

851

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

852

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

853

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

854

logger.info(

855

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

856

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

857

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

858

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

859

# We can't compare src and tgt directly because they may have

860

# the same content but be broken up into blocks differently, eg:

861

#

862

# ["he", "llo"] vs ["h", "ello"]

863

#

864

# We want those to compare equal, ideally without having to

865

# actually concatenate the strings (these may be tens of

866

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

867

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

868

# These are identical; we don't need to generate a patch,

869

# just issue copy commands on the device.

870

xf.style = "move"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

871

xf.patch_info = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

872

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

873

if xf.src_ranges != xf.tgt_ranges:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

874

logger.info(

875

"%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,

876

100.0, xf.style,

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

877

xf.tgt_name if xf.tgt_name == xf.src_name else (

878

xf.tgt_name + " (from " + xf.src_name + ")"),

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

879

str(xf.tgt_ranges), str(xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

880

else:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

881

if xf.patch_info:

882

# We have already generated the patch (e.g. during split of large

883

# APKs or reduction of stash size)

884

imgdiff = xf.patch_info.imgdiff

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

885

else:

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

886

imgdiff = self.CanUseImgdiff(

887

xf.tgt_name, xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

888

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

889

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

894

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

895

patches = self.ComputePatchesForInputList(diff_queue, False)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

896

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

897

offset = 0

898

with open(prefix + ".patch.dat", "wb") as patch_fd:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

899

for index, patch_info, _ in patches:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

900

xf = self.transfers[index]

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

901

xf.patch_len = len(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

902

xf.patch_start = offset

903

offset += xf.patch_len

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

904

patch_fd.write(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

905

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

906

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

907

logger.info(

908

"%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,

909

xf.patch_len * 100.0 / tgt_size, xf.style,

910

xf.tgt_name if xf.tgt_name == xf.src_name else (

911

xf.tgt_name + " (from " + xf.src_name + ")"),

912

xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

913

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

914

def AssertSha1Good(self):

915

"""Check the SHA-1 of the src & tgt blocks in the transfer list.

916

917

Double check the SHA-1 value to avoid the issue in b/71908713, where

918

SparseImage.RangeSha1() messed up with the hash calculation in multi-thread

919

environment. That specific problem has been fixed by protecting the

920

underlying generator function 'SparseImage._GetRangeData()' with lock.

921

"""

922

for xf in self.transfers:

923

tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)

924

assert xf.tgt_sha1 == tgt_sha1

925

if xf.style == "diff":

926

src_sha1 = self.src.RangeSha1(xf.src_ranges)

927

assert xf.src_sha1 == src_sha1

928

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

929

def AssertSequenceGood(self):

930

# Simulate the sequences of transfers we will output, and check that:

931

# - we never read a block after writing it, and

932

# - we write every block we care about exactly once.

933

934

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

935

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

936

937

# Imagine processing the transfers in order.

938

for xf in self.transfers:

939

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

940

941

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

942

for _, sr in xf.use_stash:

943

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

944

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

945

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

946

# Source image could be larger. Don't check the blocks that are in the

947

# source image only. Since they are not in 'touched', and won't ever

948

# be touched.

949

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

950

assert touched[i] == 0

951

952

# Check that the output blocks for this transfer haven't yet

953

# been touched, and touch all the blocks written by this

954

# transfer.

955

for s, e in xf.tgt_ranges:

956

for i in range(s, e):

957

assert touched[i] == 0

958

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

959

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

960

if self.tgt.hashtree_info:

961

for s, e in self.tgt.hashtree_info.hashtree_range:

962

for i in range(s, e):

963

assert touched[i] == 0

964

touched[i] = 1

965

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

966

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

967

for s, e in self.tgt.care_map:

968

for i in range(s, e):

969

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

970

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

971

def FindSequenceForTransfers(self):

972

"""Finds a sequence for the given transfers.

973

974

The goal is to minimize the violation of order dependencies between these

975

transfers, so that fewer blocks are stashed when applying the update.

976

"""

977

978

# Clear the existing dependency between transfers

979

for xf in self.transfers:

980

xf.goes_before = OrderedDict()

981

xf.goes_after = OrderedDict()

xf.stash_before = []

xf.use_stash = []

# Find the ordering dependencies among transfers (this is O(n^2)

987

# in the number of transfers).

988

self.GenerateDigraph()

989

# Find a sequence of transfers that satisfies as many ordering

990

# dependencies as possible (heuristically).

991

self.FindVertexSequence()

992

# Fix up the ordering dependencies that the sequence didn't

993

# satisfy.

994

self.ReverseBackwardEdges()

995

self.ImproveVertexSequence()

996

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

997

def ImproveVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

998

logger.info("Improving vertex order...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

999

1000

# At this point our digraph is acyclic; we reversed any edges that

1001

# were backwards in the heuristically-generated sequence. The

1002

# previously-generated order is still acceptable, but we hope to

1003

# find a better order that needs less memory for stashed data.

1004

# Now we do a topological sort to generate a new vertex order,

1005

# using a greedy algorithm to choose which vertex goes next

1006

# whenever we have a choice.

1007

1008

# Make a copy of the edge set; this copy will get destroyed by the

1009

# algorithm.

1010

for xf in self.transfers:

1011

xf.incoming = xf.goes_after.copy()

1012

xf.outgoing = xf.goes_before.copy()

1013

1014

L = [] # the new vertex order

1015

1016

# S is the set of sources in the remaining graph; we always choose

1017

# the one that leaves the least amount of stashed data after it's

1018

# executed.

1019

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

1025

L.append(xf)

1026

for u in xf.outgoing:

1027

del u.incoming[xf]

1028

if not u.incoming:

1029

heapq.heappush(S, (u.NetStashChange(), u.order, u))

1030

1031

# if this fails then our graph had a cycle.

1032

assert len(L) == len(self.transfers)

1033

1034

self.transfers = L

1035

for i, xf in enumerate(L):

1036

xf.order = i

1037

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1038

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1039

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

1040

1041

For each transfer, make sure it properly stashes the blocks it touches and

1042

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

1043

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

1044

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

1045

it is possible to have multiple pairs with different 'stash_raw_id's. Each

1046

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

1047

blocks will be written to the same stash slot in WriteTransfers().

1048

"""

1049

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1050

logger.info("Reversing backward edges...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1051

in_order = 0

1052

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1053

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1054

stash_size = 0

1055

1056

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1057

for u in xf.goes_before.copy():

1058

# xf should go before u

1059

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

1064

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

1069

assert overlap

1070

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1071

u.stash_before.append((stash_raw_id, overlap))

1072

xf.use_stash.append((stash_raw_id, overlap))

1073

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1074

stash_size += overlap.size()

1075

1076

# reverse the edge direction; now xf must go after u

1077

del xf.goes_before[u]

1078

del u.goes_after[xf]

1079

xf.goes_after[u] = None # value doesn't matter

1080

u.goes_before[xf] = None

1081

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1082

logger.info(

1083

" %d/%d dependencies (%.2f%%) were violated; %d source blocks "

1084

"stashed.", out_of_order, in_order + out_of_order,

1085

(out_of_order * 100.0 / (in_order + out_of_order)) if (

1086

in_order + out_of_order) else 0.0,

1087

stash_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1088

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1089

def FindVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1090

logger.info("Finding vertex sequence...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1091

1092

# This is based on "A Fast & Effective Heuristic for the Feedback

1093

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1094

# it as starting with the digraph G and moving all the vertices to

1095

# be on a horizontal line in some order, trying to minimize the

1096

# number of edges that end up pointing to the left. Left-pointing

1097

# edges will get removed to turn the digraph into a DAG. In this

1098

# case each edge has a weight which is the number of source blocks

1099

# we'll lose if that edge is removed; we try to minimize the total

1100

# weight rather than just the number of edges.

1101

1102

# Make a copy of the edge set; this copy will get destroyed by the

1103

# algorithm.

1104

for xf in self.transfers:

1105

xf.incoming = xf.goes_after.copy()

1106

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1107

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1108

1109

# We use an OrderedDict instead of just a set so that the output

1110

# is repeatable; otherwise it would depend on the hash values of

1111

# the transfer objects.

1112

G = OrderedDict()

1113

for xf in self.transfers:

1114

G[xf] = None

1115

s1 = deque() # the left side of the sequence, built from left to right

1116

s2 = deque() # the right side of the sequence, built from right to left

1117

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1118

heap = []

1119

for xf in self.transfers:

1120

xf.heap_item = HeapItem(xf)

1121

heap.append(xf.heap_item)

1122

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1123

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1124

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1125

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1126

# { key1: None, key2: None, ... }.

1127

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1128

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1129

1130

def adjust_score(iu, delta):

1131

iu.score += delta

1132

iu.heap_item.clear()

1133

iu.heap_item = HeapItem(iu)

1134

heapq.heappush(heap, iu.heap_item)

1135

1136

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1137

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1138

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1139

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1140

for u in sinks:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1141

if u not in G:

1142

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1143

s2.appendleft(u)

1144

del G[u]

1145

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1146

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1147

if not iu.outgoing:

1148

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1149

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1150

1151

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1152

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1153

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1154

for u in sources:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1155

if u not in G:

1156

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1157

s1.append(u)

1158

del G[u]

1159

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1160

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1161

if not iu.incoming:

1162

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1163

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1164

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1165

if not G:

1166

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1167

1168

# Find the "best" vertex to put next. "Best" is the one that

1169

# maximizes the net difference in source blocks saved we get by

1170

# pretending it's a source rather than a sink.

1171

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1172

while True:

1173

u = heapq.heappop(heap)

1174

if u and u.item in G:

1175

u = u.item

1176

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1177

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1178

s1.append(u)

1179

del G[u]

1180

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1181

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1182

if not iu.incoming:

1183

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1184

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1185

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1186

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1187

if not iu.outgoing:

1188

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1189

1190

# Now record the sequence in the 'order' field of each transfer,

1191

# and by rearranging self.transfers to be in the chosen sequence.

1192

1193

new_transfers = []

1194

for x in itertools.chain(s1, s2):

1195

x.order = len(new_transfers)

1196

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1201

1202

def GenerateDigraph(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1203

logger.info("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1204

1205

# Each item of source_ranges will be:

1206

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1207

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1208

source_ranges = []

1209

for b in self.transfers:

1210

for s, e in b.src_ranges:

1211

if e > len(source_ranges):

1212

source_ranges.extend([None] * (e-len(source_ranges)))

1213

for i in range(s, e):

1214

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1215

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1216

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1217

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1218

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1219

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1220

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1221

for s, e in a.tgt_ranges:

1222

for i in range(s, e):

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1223

if i >= len(source_ranges):

1224

break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1225

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1226

if source_ranges[i] is not None:

1227

for j in source_ranges[i]:

1228

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1229

1230

for b in intersections:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1231

if a is b:

1232

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1233

1234

# If the blocks written by A are read by B, then B needs to go before A.

1235

i = a.tgt_ranges.intersect(b.src_ranges)

1236

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1237

if b.src_name == "__ZERO":

1238

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1243

b.goes_before[a] = size

1244

a.goes_after[b] = size

1245

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1246

def ComputePatchesForInputList(self, diff_queue, compress_target):

1247

"""Returns a list of patch information for the input list of transfers.

1248

1249

Args:

1250

diff_queue: a list of transfers with style 'diff'

1251

compress_target: If True, compresses the target ranges of each

1252

transfers; and save the size.

1253

1254

Returns:

1255

A list of (transfer order, patch_info, compressed_size) tuples.

"""

if not diff_queue:

return []

if self.threads > 1:

logger.info("Computing patches (using %d threads)...", self.threads)

1263

else:

1264

logger.info("Computing patches...")

1265

1266

diff_total = len(diff_queue)

1267

patches = [None] * diff_total

1268

error_messages = []

1269

1270

# Using multiprocessing doesn't give additional benefits, due to the

1271

# pattern of the code. The diffing work is done by subprocess.call, which

1272

# already runs in a separate process (not affected much by the GIL -

1273

# Global Interpreter Lock). Using multiprocess also requires either a)

1274

# writing the diff input files in the main process before forking, or b)

1275

# reopening the image file (SparseImage) in the worker processes. Doing

1276

# neither of them further improves the performance.

1277

lock = threading.Lock()

def diff_worker():

while True:

with lock:

if not diff_queue:

return

xf_index, imgdiff, patch_index = diff_queue.pop()

1285

xf = self.transfers[xf_index]

1286

1287

message = []

1288

compressed_size = None

1289

1290

patch_info = xf.patch_info

1291

if not patch_info:

1292

src_file = common.MakeTempFile(prefix="src-")

1293

with open(src_file, "wb") as fd:

1294

self.src.WriteRangeDataToFd(xf.src_ranges, fd)

1295

1296

tgt_file = common.MakeTempFile(prefix="tgt-")

1297

with open(tgt_file, "wb") as fd:

1298

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)

1299

1300

try:

1301

patch_info = compute_patch(src_file, tgt_file, imgdiff)

1302

except ValueError as e:

1303

message.append(

1304

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

1305

"imgdiff" if imgdiff else "bsdiff",

1306

xf.tgt_name if xf.tgt_name == xf.src_name else

1307

xf.tgt_name + " (from " + xf.src_name + ")",

1308

xf.tgt_ranges, xf.src_ranges, e.message))

1309

1310

if compress_target:

1311

tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)

1312

try:

1313

# Compresses with the default level

1314

compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)

1315

compressed_data = (compress_obj.compress("".join(tgt_data))

1316

+ compress_obj.flush())

1317

compressed_size = len(compressed_data)

1318

except zlib.error as e:

1319

message.append(

1320

"Failed to compress the data in target range {} for {}:\n"

1321

"{}".format(xf.tgt_ranges, xf.tgt_name, e.message))

if message:

with lock:

error_messages.extend(message)

1326

1327

with lock:

1328

patches[patch_index] = (xf_index, patch_info, compressed_size)

1329

1330

threads = [threading.Thread(target=diff_worker)

1331

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

if error_messages:

logger.error('ERROR:')

1339

logger.error('\n'.join(error_messages))

1340

logger.error('\n\n\n')

sys.exit(1)

return patches

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1345

def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1346

"""Converts the diff transfers to reduce the max simultaneous stash.

1347

1348

Since the 'new' data is compressed with deflate, we can select the 'diff'

1349

transfers for conversion by comparing its patch size with the size of the

1350

compressed data. Ideally, we want to convert the transfers with a small

1351

size increase, but using a large number of stashed blocks.

1352

"""

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1353

TransferSizeScore = namedtuple("TransferSizeScore",

1354

"xf, used_stash_blocks, score")

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1355

1356

logger.info("Selecting diff commands to convert to new.")

1357

diff_queue = []

1358

for xf in self.transfers:

1359

if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:

1360

use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,

1361

xf.src_ranges)

1362

diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))

1363

1364

# Remove the 'move' transfers, and compute the patch & compressed size

1365

# for the remaining.

1366

result = self.ComputePatchesForInputList(diff_queue, True)

1367

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1368

conversion_candidates = []

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1369

for xf_index, patch_info, compressed_size in result:

1370

xf = self.transfers[xf_index]

1371

if not xf.patch_info:

1372

xf.patch_info = patch_info

1373

1374

size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size

1375

diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1376

logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1377

" compression_size: %d, ratio %.2f%%", xf.tgt_name,

1378

xf.tgt_ranges.size(), diff_style,

1379

len(xf.patch_info.content), compressed_size, size_ratio)

1380

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1381

used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1382

# Convert the transfer to new if the compressed size is smaller or equal.

1383

# We don't need to maintain the stash_before lists here because the

1384

# graph will be regenerated later.

1385

if len(xf.patch_info.content) >= compressed_size:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1386

# Add the transfer to the candidate list with negative score. And it

1387

# will be converted later.

1388

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1389

-1))

1390

elif used_stash_blocks > 0:

1391

# This heuristic represents the size increase in the final package to

1392

# remove per unit of stashed data.

1393

score = ((compressed_size - len(xf.patch_info.content)) * 100.0

1394

/ used_stash_blocks)

1395

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1396

score))

1397

# Transfers with lower score (i.e. less expensive to convert) will be

1398

# converted first.

1399

conversion_candidates.sort(key=lambda x: x.score)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1400

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1401

# TODO(xunchang), improve the logic to find the transfers to convert, e.g.

1402

# convert the ones that contribute to the max stash, run ReviseStashSize

1403

# multiple times etc.

1404

removed_stashed_blocks = 0

1405

for xf, used_stash_blocks, _ in conversion_candidates:

1406

logger.info("Converting %s to new", xf.tgt_name)

1407

xf.ConvertToNew()

1408

removed_stashed_blocks += used_stash_blocks

1409

# Experiments show that we will get a smaller package size if we remove

1410

# slightly more stashed blocks than the violated stash blocks.

1411

if removed_stashed_blocks >= violated_stash_blocks:

1412

break

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1413

1414

logger.info("Removed %d stashed blocks", removed_stashed_blocks)

1415

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1416

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1417

"""Parse the file_map to generate all the transfers."""

1418

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1419

def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1420

src_ranges, style, by_id):

1421

"""Add one or multiple Transfer()s by splitting large files.

1422

1423

For BBOTA v3, we need to stash source blocks for resumable feature.

1424

However, with the growth of file size and the shrink of the cache

1425

partition source blocks are too large to be stashed. If a file occupies

1426

too many blocks, we split it into smaller pieces by getting multiple

1427

Transfer()s.

1428

1429

The downside is that after splitting, we may increase the package size

1430

since the split pieces don't align well. According to our experiments,

1431

1/8 of the cache size as the per-piece limit appears to be optimal.

1432

Compared to the fixed 1024-block limit, it reduces the overall package

1433

size by 30% for volantis, and 20% for angler and bullhead."""

1434

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1435

pieces = 0

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1436

while (tgt_ranges.size() > max_blocks_per_transfer and

1437

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1438

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1439

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1440

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1441

src_first = src_ranges.first(max_blocks_per_transfer)

1442

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1443

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1444

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1445

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1446

1447

tgt_ranges = tgt_ranges.subtract(tgt_first)

1448

src_ranges = src_ranges.subtract(src_first)

1449

pieces += 1

1450

1451

# Handle remaining blocks.

1452

if tgt_ranges.size() or src_ranges.size():

1453

# Must be both non-empty.

1454

assert tgt_ranges.size() and src_ranges.size()

1455

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1456

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1457

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1458

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1459

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1460

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1461

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,

1462

by_id):

1463

"""Find all the zip files and split the others with a fixed chunk size.

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1464

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1465

This function will construct a list of zip archives, which will later be

1466

split by imgdiff to reduce the final patch size. For the other files,

1467

we will plainly split them based on a fixed chunk size with the potential

1468

patch size penalty.

1469

"""

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1470

1471

assert style == "diff"

1472

1473

# Change nothing for small files.

1474

if (tgt_ranges.size() <= max_blocks_per_transfer and

1475

src_ranges.size() <= max_blocks_per_transfer):

1476

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1477

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

style, by_id)

return

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

1481

# Split large APKs with imgdiff, if possible. We're intentionally checking

1482

# file types one more time (CanUseImgdiff() checks that as well), before

1483

# calling the costly RangeSha1()s.

1484

if (self.FileTypeSupportedByImgdiff(tgt_name) and

1485

self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

1486

if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1487

large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))

1488

return

1489

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1490

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1491

src_ranges, style, by_id)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1492

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1493

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1494

split=False):

1495

"""Wrapper function for adding a Transfer()."""

1496

1497

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1498

# otherwise add the Transfer() as is.

1499

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1500

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1501

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1502

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1503

return

1504

1505

# Handle .odex files specially to analyze the block-wise difference. If

1506

# most of the blocks are identical with only few changes (e.g. header),

1507

# we will patch the changed blocks only. This avoids stashing unchanged

1508

# blocks while patching. We limit the analysis to files without size

1509

# changes only. This is to avoid sacrificing the OTA generation cost too

1510

# much.

1511

if (tgt_name.split(".")[-1].lower() == 'odex' and

1512

tgt_ranges.size() == src_ranges.size()):

1513

1514

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1515

# few blocks remain identical, we lose the opportunity to use imgdiff

1516

# that may have better compression ratio than bsdiff.

1517

crop_threshold = 0.5

1518

1519

tgt_skipped = RangeSet()

1520

src_skipped = RangeSet()

1521

tgt_size = tgt_ranges.size()

1522

tgt_changed = 0

1523

for src_block, tgt_block in zip(src_ranges.next_item(),

1524

tgt_ranges.next_item()):

1525

src_rs = RangeSet(str(src_block))

1526

tgt_rs = RangeSet(str(tgt_block))

1527

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1528

tgt_skipped = tgt_skipped.union(tgt_rs)

1529

src_skipped = src_skipped.union(src_rs)

1530

else:

1531

tgt_changed += tgt_rs.size()

1532

1533

# Terminate early if no clear sign of benefits.

1534

if tgt_changed > tgt_size * crop_threshold:

1535

break

1536

1537

if tgt_changed < tgt_size * crop_threshold:

1538

assert tgt_changed + tgt_skipped.size() == tgt_size

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1539

logger.info(

1540

'%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,

1541

tgt_skipped.size() * 100.0 / tgt_size, tgt_name)

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1542

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1543

"%s-skipped" % (tgt_name,),

1544

"%s-skipped" % (src_name,),

1545

tgt_skipped, src_skipped, style, by_id)

1546

1547

# Intentionally change the file extension to avoid being imgdiff'd as

1548

# the files are no longer in their original format.

1549

tgt_name = "%s-cropped" % (tgt_name,)

1550

src_name = "%s-cropped" % (src_name,)

1551

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1552

src_ranges = src_ranges.subtract(src_skipped)

1553

1554

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1559

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1560

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1561

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1562

def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,

1563

split_info):

1564

"""Parse the split_info and return a list of info tuples.

1565

1566

Args:

1567

patch_size: total size of the patch file.

1568

tgt_ranges: Ranges of the target file within the original image.

1569

src_ranges: Ranges of the source file within the original image.

split_info format:

imgdiff version#

count of pieces

<patch_size_1> <tgt_size_1> <src_ranges_1>

1574

...

1575

<patch_size_n> <tgt_size_n> <src_ranges_n>

1576

1577

Returns:

1578

[patch_start, patch_len, split_tgt_ranges, split_src_ranges]

1579

"""

1580

1581

version = int(split_info[0])

1582

assert version == 2

1583

count = int(split_info[1])

1584

assert len(split_info) - 2 == count

split_info_list = []

patch_start = 0

tgt_remain = copy.deepcopy(tgt_ranges)

1589

# each line has the format <patch_size>, <tgt_size>, <src_ranges>

1590

for line in split_info[2:]:

1591

info = line.split()

1592

assert len(info) == 3

1593

patch_length = int(info[0])

1594

1595

split_tgt_size = int(info[1])

1596

assert split_tgt_size % 4096 == 0

1597

assert split_tgt_size / 4096 <= tgt_remain.size()

1598

split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)

1599

tgt_remain = tgt_remain.subtract(split_tgt_ranges)

1600

1601

# Find the split_src_ranges within the image file from its relative

1602

# position in file.

1603

split_src_indices = RangeSet.parse_raw(info[2])

1604

split_src_ranges = RangeSet()

1605

for r in split_src_indices:

1606

curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))

1607

assert not split_src_ranges.overlaps(curr_range)

1608

split_src_ranges = split_src_ranges.union(curr_range)

1609

1610

split_info_list.append((patch_start, patch_length,

1611

split_tgt_ranges, split_src_ranges))

1612

patch_start += patch_length

1613

1614

# Check that the sizes of all the split pieces add up to the final file

1615

# size for patch and target.

1616

assert tgt_remain.size() == 0

1617

assert patch_start == patch_size

1618

return split_info_list

1619

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1620

def SplitLargeApks():

1621

"""Split the large apks files.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1622

1623

Example: Chrome.apk will be split into

1624

src-0: Chrome.apk-0, tgt-0: Chrome.apk-0

1625

src-1: Chrome.apk-1, tgt-1: Chrome.apk-1

1626

...

1627

1628

After the split, the target pieces are continuous and block aligned; and

1629

the source pieces are mutually exclusive. During the split, we also

1630

generate and save the image patch between src-X & tgt-X. This patch will

1631

be valid because the block ranges of src-X & tgt-X will always stay the

1632

same afterwards; but there's a chance we don't use the patch if we

1633

convert the "diff" command into "new" or "move" later.

"""

while True:

with transfer_lock:

if not large_apks:

return

tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)

1641

1642

src_file = common.MakeTempFile(prefix="src-")

1643

tgt_file = common.MakeTempFile(prefix="tgt-")

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

1644

with open(src_file, "wb") as src_fd:

1645

self.src.WriteRangeDataToFd(src_ranges, src_fd)

1646

with open(tgt_file, "wb") as tgt_fd:

1647

self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1648

1649

patch_file = common.MakeTempFile(prefix="patch-")

1650

patch_info_file = common.MakeTempFile(prefix="split_info-")

1651

cmd = ["imgdiff", "-z",

1652

"--block-limit={}".format(max_blocks_per_transfer),

1653

"--split-info=" + patch_info_file,

1654

src_file, tgt_file, patch_file]

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

1655

proc = common.Run(cmd)

1656

imgdiff_output, _ = proc.communicate()

1657

assert proc.returncode == 0, \

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

1658

"Failed to create imgdiff patch between {} and {}:\n{}".format(

1659

src_name, tgt_name, imgdiff_output)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1660

1661

with open(patch_info_file) as patch_info:

1662

lines = patch_info.readlines()

1663

1664

patch_size_total = os.path.getsize(patch_file)

1665

split_info_list = ParseAndValidateSplitInfo(patch_size_total,

1666

tgt_ranges, src_ranges,

1667

lines)

1668

for index, (patch_start, patch_length, split_tgt_ranges,

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1669

split_src_ranges) in enumerate(split_info_list):

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1670

with open(patch_file) as f:

1671

f.seek(patch_start)

1672

patch_content = f.read(patch_length)

1673

1674

split_src_name = "{}-{}".format(src_name, index)

1675

split_tgt_name = "{}-{}".format(tgt_name, index)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1676

split_large_apks.append((split_tgt_name,

split_src_name,

split_tgt_ranges,

split_src_ranges,

patch_content))

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1681

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1682

logger.info("Finding transfers...")

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1683

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1684

large_apks = []

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1685

split_large_apks = []

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1686

cache_size = common.OPTIONS.cache_size

1687

split_threshold = 0.125

1688

max_blocks_per_transfer = int(cache_size * split_threshold /

1689

self.tgt.blocksize)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1690

empty = RangeSet()

Tianjie Xu

20a86cd

2018-01-12 12:21:00 -0800

[diff] [blame]

1691

for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1692

if tgt_fn == "__ZERO":

1693

# the special "__ZERO" domain is all the blocks not contained

1694

# in any file and that are filled with zeros. We have a

1695

# special transfer style for zero blocks.

1696

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1697

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1698

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1699

continue

1700

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1701

elif tgt_fn == "__COPY":

1702

# "__COPY" domain includes all the blocks not contained in any

1703

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1704

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1705

continue

1706

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

1707

elif tgt_fn == "__HASHTREE":

1708

continue

1709

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1710

elif tgt_fn in self.src.file_map:

1711

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1712

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1713

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1714

continue

1715

1716

b = os.path.basename(tgt_fn)

1717

if b in self.src_basenames:

1718

# Look for an exact basename match in the source.

1719

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1720

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1721

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1722

continue

1723

1724

b = re.sub("[0-9]+", "#", b)

1725

if b in self.src_numpatterns:

1726

# Look for a 'number pattern' match (a basename match after

1727

# all runs of digits are replaced by "#"). (This is useful

1728

# for .so files that contain version numbers in the filename

1729

# that get bumped.)

1730

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1731

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1732

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1733

continue

1734

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1735

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1736

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1737

transfer_lock = threading.Lock()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1738

threads = [threading.Thread(target=SplitLargeApks)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1739

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1745

# Sort the split transfers for large apks to generate a determinate package.

1746

split_large_apks.sort()

1747

for (tgt_name, src_name, tgt_ranges, src_ranges,

1748

patch) in split_large_apks:

1749

transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1750

self.tgt.RangeSha1(tgt_ranges),

1751

self.src.RangeSha1(src_ranges),

1752

"diff", self.transfers)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1753

transfer_split.patch_info = PatchInfo(True, patch)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1754

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1755

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1756

for k in self.src.file_map.keys():

1757

b = os.path.basename(k)

1758

self.src_basenames[b] = k

1759

b = re.sub("[0-9]+", "#", b)

1760

self.src_numpatterns[b] = k

1761

1762

@staticmethod

1763

def AssertPartition(total, seq):

1764

"""Assert that all the RangeSets in 'seq' form a partition of the

1765

'total' RangeSet (ie, they are nonintersecting and their union

1766

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1767

Doug Zongker