Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

18

import copy

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

20

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

21

import itertools

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

22

import logging

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

23

import multiprocessing

24

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

25

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

27

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

29

import zlib

30

from collections import deque, namedtuple, OrderedDict

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

31

from hashlib import sha1

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

32

33

import common

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

34

from rangelib import RangeSet

35

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

36

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

37

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

38

logger = logging.getLogger(__name__)

39

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

40

# The tuple contains the style and bytes of a bsdiff|imgdiff patch.

41

PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])

42

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

43

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

44

def compute_patch(srcfile, tgtfile, imgdiff=False):

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

45

"""Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

46

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

47

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

48

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

49

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

50

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

51

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

52

# here, since they contain temp filenames only.

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

53

proc = common.Run(cmd, verbose=False)

54

output, _ = proc.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

55

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

56

if proc.returncode != 0:

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

57

raise ValueError(output)

58

59

with open(patchfile, 'rb') as f:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

60

return PatchInfo(imgdiff, f.read())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

61

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

62

63

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

64

def RangeSha1(self, ranges):

65

raise NotImplementedError

66

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

67

def ReadRangeSet(self, ranges):

68

raise NotImplementedError

69

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

70

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

71

raise NotImplementedError

72

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

73

def WriteRangeDataToFd(self, ranges, fd):

74

raise NotImplementedError

75

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

76

77

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

78

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

79

80

def __init__(self):

81

self.blocksize = 4096

82

self.care_map = RangeSet()

83

self.clobbered_blocks = RangeSet()

84

self.extended = RangeSet()

85

self.total_blocks = 0

86

self.file_map = {}

Yifan Hong

bb2658d

2019-01-25 12:30:58 -0800

[diff] [blame]

87

self.hashtree_info = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

88

89

def RangeSha1(self, ranges):

90

return sha1().hexdigest()

91

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

92

def ReadRangeSet(self, ranges):

93

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

94

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

95

def TotalSha1(self, include_clobbered_blocks=False):

96

# EmptyImage always carries empty clobbered_blocks, so

97

# include_clobbered_blocks can be ignored.

98

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

99

return sha1().hexdigest()

100

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

101

def WriteRangeDataToFd(self, ranges, fd):

102

raise ValueError("Can't write data from EmptyImage to file")

103

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

104

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

105

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

106

"""An image wrapped around a single string of data."""

107

108

def __init__(self, data, trim=False, pad=False):

109

self.data = data

110

self.blocksize = 4096

111

112

assert not (trim and pad)

113

114

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

115

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

116

if partial > 0:

117

if trim:

118

self.data = self.data[:-partial]

119

elif pad:

120

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

121

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

122

else:

123

raise ValueError(("data for DataImage must be multiple of %d bytes "

124

"unless trim or pad is specified") %

125

(self.blocksize,))

126

127

assert len(self.data) % self.blocksize == 0

128

129

self.total_blocks = len(self.data) / self.blocksize

130

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

131

# When the last block is padded, we always write the whole block even for

132

# incremental OTAs. Because otherwise the last block may get skipped if

133

# unchanged for an incremental, but would fail the post-install

134

# verification if it has non-zero contents in the padding bytes.

135

# Bug: 23828506

136

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

137

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

138

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

139

clobbered_blocks = []

140

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

141

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

146

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

147

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

148

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

149

if d == reference:

150

zero_blocks.append(i)

151

zero_blocks.append(i+1)

152

else:

153

nonzero_blocks.append(i)

154

nonzero_blocks.append(i+1)

155

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

156

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

157

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

158

self.file_map = dict()

159

if zero_blocks:

160

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

161

if nonzero_blocks:

162

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

163

if clobbered_blocks:

164

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

165

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

166

def _GetRangeData(self, ranges):

167

for s, e in ranges:

168

yield self.data[s*self.blocksize:e*self.blocksize]

169

170

def RangeSha1(self, ranges):

171

h = sha1()

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

172

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

176

def ReadRangeSet(self, ranges):

Yifan Hong

6f3eaeb

2019-04-09 16:49:33 -0700

[diff] [blame]

177

return list(self._GetRangeData(ranges))

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

178

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

179

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

180

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

181

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

182

else:

183

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

184

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

185

def WriteRangeDataToFd(self, ranges, fd):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

186

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

187

fd.write(data)

188

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

189

Yifan Hong

8a66a71

2019-04-04 15:37:57 -0700

[diff] [blame]

190

class FileImage(Image):

191

"""An image wrapped around a raw image file."""

192

193

def __init__(self, path, hashtree_info_generator=None):

194

self.path = path

195

self.blocksize = 4096

196

self._file_size = os.path.getsize(self.path)

197

self._file = open(self.path, 'r')

198

199

if self._file_size % self.blocksize != 0:

200

raise ValueError("Size of file %s must be multiple of %d bytes, but is %d"

201

% self.path, self.blocksize, self._file_size)

202

203

self.total_blocks = self._file_size / self.blocksize

204

self.care_map = RangeSet(data=(0, self.total_blocks))

205

self.clobbered_blocks = RangeSet()

206

self.extended = RangeSet()

207

208

self.hashtree_info = None

209

if hashtree_info_generator:

210

self.hashtree_info = hashtree_info_generator.Generate(self)

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

215

216

for i in range(self.total_blocks):

217

d = self._file.read(self.blocksize)

218

if d == reference:

219

zero_blocks.append(i)

220

zero_blocks.append(i+1)

221

else:

222

nonzero_blocks.append(i)

223

nonzero_blocks.append(i+1)

224

225

assert zero_blocks or nonzero_blocks

self.file_map = {}

if zero_blocks:

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

230

if nonzero_blocks:

231

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

232

if self.hashtree_info:

233

self.file_map["__HASHTREE"] = self.hashtree_info.hashtree_range

def __del__(self):

self._file.close()

def _GetRangeData(self, ranges):

239

for s, e in ranges:

240

self._file.seek(s * self.blocksize)

241

for _ in range(s, e):

242

yield self._file.read(self.blocksize)

243

244

def RangeSha1(self, ranges):

245

h = sha1()

246

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

h.update(data)

return h.hexdigest()

def ReadRangeSet(self, ranges):

251

return list(self._GetRangeData(ranges))

252

253

def TotalSha1(self, include_clobbered_blocks=False):

254

assert not self.clobbered_blocks

255

return self.RangeSha1(self.care_map)

256

257

def WriteRangeDataToFd(self, ranges, fd):

258

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

fd.write(data)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

262

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

263

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

264

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

265

self.tgt_name = tgt_name

266

self.src_name = src_name

267

self.tgt_ranges = tgt_ranges

268

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

269

self.tgt_sha1 = tgt_sha1

270

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

271

self.style = style

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

272

273

# We use OrderedDict rather than dict so that the output is repeatable;

274

# otherwise it would depend on the hash values of the Transfer objects.

275

self.goes_before = OrderedDict()

276

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

277

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

278

self.stash_before = []

279

self.use_stash = []

280

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

284

self._patch_info = None

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

285

286

@property

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

287

def patch_info(self):

288

return self._patch_info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

289

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

290

@patch_info.setter

291

def patch_info(self, info):

292

if info:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

293

assert self.style == "diff"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

294

self._patch_info = info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

295

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

296

def NetStashChange(self):

297

return (sum(sr.size() for (_, sr) in self.stash_before) -

298

sum(sr.size() for (_, sr) in self.use_stash))

299

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

300

def ConvertToNew(self):

301

assert self.style != "new"

302

self.use_stash = []

303

self.style = "new"

304

self.src_ranges = RangeSet()

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

305

self.patch_info = None

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

306

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

307

def __str__(self):

308

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

309

" to " + str(self.tgt_ranges) + ">")

310

311

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

312

@functools.total_ordering

313

class HeapItem(object):

314

def __init__(self, item):

315

self.item = item

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

316

# Negate the score since python's heap is a min-heap and we want the

317

# maximum score.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

318

self.score = -item.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

319

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

320

def clear(self):

321

self.item = None

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

322

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

323

def __bool__(self):

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

324

return self.item is not None

325

326

# Python 2 uses __nonzero__, while Python 3 uses __bool__.

327

__nonzero__ = __bool__

328

329

# The rest operations are generated by functools.total_ordering decorator.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

330

def __eq__(self, other):

331

return self.score == other.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

332

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

333

def __le__(self, other):

334

return self.score <= other.score

335

336

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

337

class ImgdiffStats(object):

338

"""A class that collects imgdiff stats.

339

340

It keeps track of the files that will be applied imgdiff while generating

341

BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific

342

reasons. The stats is only meaningful when imgdiff not being disabled by the

343

caller of BlockImageDiff. In addition, only files with supported types

344

(BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

345

"""

346

347

USED_IMGDIFF = "APK files diff'd with imgdiff"

348

USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"

349

350

# Reasons for not applying imgdiff on APKs.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

351

SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

352

SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

353

SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

354

355

# The list of valid reasons, which will also be the dumped order in a report.

356

REASONS = (

357

USED_IMGDIFF,

358

USED_IMGDIFF_LARGE_APK,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

359

SKIPPED_NONMONOTONIC,

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

360

SKIPPED_SHARED_BLOCKS,

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

361

SKIPPED_INCOMPLETE,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

)

def __init__(self):

self.stats = {}

def Log(self, filename, reason):

368

"""Logs why imgdiff can or cannot be applied to the given filename.

369

370

Args:

371

filename: The filename string.

372

reason: One of the reason constants listed in REASONS.

373

374

Raises:

375

AssertionError: On unsupported filetypes or invalid reason.

376

"""

377

assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)

378

assert reason in self.REASONS

379

380

if reason not in self.stats:

381

self.stats[reason] = set()

382

self.stats[reason].add(filename)

383

384

def Report(self):

385

"""Prints a report of the collected imgdiff stats."""

386

387

def print_header(header, separator):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

388

logger.info(header)

389

logger.info(separator * len(header) + '\n')

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

390

391

print_header(' Imgdiff Stats Report ', '=')

392

for key in self.REASONS:

393

if key not in self.stats:

394

continue

395

values = self.stats[key]

396

section_header = ' {} (count: {}) '.format(key, len(values))

397

print_header(section_header, '-')

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

398

logger.info(''.join([' {}\n'.format(name) for name in values]))

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

399

400

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

401

class BlockImageDiff(object):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

402

"""Generates the diff of two block image objects.

403

404

BlockImageDiff works on two image objects. An image object is anything that

405

provides the following attributes:

406

407

blocksize: the size in bytes of a block, currently must be 4096.

408

409

total_blocks: the total size of the partition/image, in blocks.

410

411

care_map: a RangeSet containing which blocks (in the range [0,

412

total_blocks) we actually care about; i.e. which blocks contain data.

413

414

file_map: a dict that partitions the blocks contained in care_map into

415

smaller domains that are useful for doing diffs on. (Typically a domain

416

is a file, and the key in file_map is the pathname.)

417

418

clobbered_blocks: a RangeSet containing which blocks contain data but may

419

be altered by the FS. They need to be excluded when verifying the

420

partition integrity.

421

422

ReadRangeSet(): a function that takes a RangeSet and returns the data

423

contained in the image blocks of that RangeSet. The data is returned as

424

a list or tuple of strings; concatenating the elements together should

425

produce the requested data. Implementations are free to break up the

426

data into list/tuple elements in any way that is convenient.

427

428

RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of

429

all the data in the specified range.

430

431

TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of

432

all the data in the image (ie, all the blocks in the care_map minus

433

clobbered_blocks, or including the clobbered blocks if

434

include_clobbered_blocks is True).

435

436

When creating a BlockImageDiff, the src image may be None, in which case the

437

list of transfers produced will never read from the original image.

438

"""

439

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

440

def __init__(self, tgt, src=None, threads=None, version=4,

441

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

442

if threads is None:

443

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

444

if threads == 0:

445

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

446

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

447

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

448

self.transfers = []

449

self.src_basenames = {}

450

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

451

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

452

self.touched_src_ranges = RangeSet()

453

self.touched_src_sha1 = None

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

454

self.disable_imgdiff = disable_imgdiff

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

455

self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

456

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

457

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

465

assert tgt.blocksize == 4096

466

assert src.blocksize == 4096

467

468

# The range sets in each filemap should comprise a partition of

469

# the care map.

470

self.AssertPartition(src.care_map, src.file_map.values())

471

self.AssertPartition(tgt.care_map, tgt.file_map.values())

472

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

473

@property

474

def max_stashed_size(self):

475

return self._max_stashed_size

476

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

477

@staticmethod

478

def FileTypeSupportedByImgdiff(filename):

479

"""Returns whether the file type is supported by imgdiff."""

480

return filename.lower().endswith(('.apk', '.jar', '.zip'))

481

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

482

def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

483

"""Checks whether we can apply imgdiff for the given RangeSets.

484

485

For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use

486

'imgdiff -z' if possible. Because it usually produces significantly smaller

487

patches than bsdiff.

488

489

This is permissible if all of the following conditions hold.

490

- The imgdiff hasn't been disabled by the caller (e.g. squashfs);

491

- The file type is supported by imgdiff;

492

- The source and target blocks are monotonic (i.e. the data is stored with

493

blocks in increasing order);

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

494

- Both files don't contain shared blocks;

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

495

- Both files have complete lists of blocks;

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

496

- We haven't removed any blocks from the source set.

497

498

If all these conditions are satisfied, concatenating all the blocks in the

499

RangeSet in order will produce a valid ZIP file (plus possibly extra zeros

500

in the last block). imgdiff is fine with extra zeros at the end of the file.

501

502

Args:

503

name: The filename to be diff'd.

504

tgt_ranges: The target RangeSet.

505

src_ranges: The source RangeSet.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

506

large_apk: Whether this is to split a large APK.

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

Returns:

A boolean result.

"""

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

511

if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

512

return False

513

514

if not tgt_ranges.monotonic or not src_ranges.monotonic:

515

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)

516

return False

517

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

518

if (tgt_ranges.extra.get('uses_shared_blocks') or

519

src_ranges.extra.get('uses_shared_blocks')):

520

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)

521

return False

522

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

523

if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):

524

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)

525

return False

526

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

527

reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk

528

else ImgdiffStats.USED_IMGDIFF)

529

self.imgdiff_stats.Log(name, reason)

530

return True

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

531

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

532

def Compute(self, prefix):

533

# When looking for a source file to use as the diff input for a

534

# target file, we try:

535

# 1) an exact path match if available, otherwise

536

# 2) a exact basename match if available, otherwise

537

# 3) a basename match after all runs of digits are replaced by

538

# "#" if available, otherwise

539

# 4) we have no source for this target.

540

self.AbbreviateSourceNames()

541

self.FindTransfers()

542

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

543

self.FindSequenceForTransfers()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

544

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

545

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

546

if common.OPTIONS.cache_size is not None:

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

547

stash_limit = (common.OPTIONS.cache_size *

548

common.OPTIONS.stash_threshold / self.tgt.blocksize)

549

# Ignore the stash limit and calculate the maximum simultaneously stashed

550

# blocks needed.

551

_, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)

552

553

# We cannot stash more blocks than the stash limit simultaneously. As a

554

# result, some 'diff' commands will be converted to new; leading to an

555

# unintended large package. To mitigate this issue, we can carefully

556

# choose the transfers for conversion. The number '1024' can be further

557

# tweaked here to balance the package size and build time.

558

if max_stashed_blocks > stash_limit + 1024:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

559

self.SelectAndConvertDiffTransfersToNew(

560

max_stashed_blocks - stash_limit)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

561

# Regenerate the sequence as the graph has changed.

562

self.FindSequenceForTransfers()

563

564

# Revise the stash size again to keep the size under limit.

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

565

self.ReviseStashSize()

566

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

567

# Double-check our work.

568

self.AssertSequenceGood()

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

569

self.AssertSha1Good()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

570

571

self.ComputePatches(prefix)

572

self.WriteTransfers(prefix)

573

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

574

# Report the imgdiff stats.

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

575

if not self.disable_imgdiff:

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

576

self.imgdiff_stats.Report()

577

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

578

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

579

def WriteSplitTransfers(out, style, target_blocks):

580

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

581

582

This prevents the target size of one command from being too large; and

583

might help to avoid fsync errors on some devices."""

584

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

585

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

586

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

587

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

588

while target_blocks:

589

blocks_to_write = target_blocks.first(blocks_limit)

590

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

591

total += blocks_to_write.size()

592

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

593

return total

594

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

595

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

596

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

597

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

598

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

599

# id. 'stashes' records the map from 'hash' to the ref count. The stash

600

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

601

stashes = {}

602

stashed_blocks = 0

603

max_stashed_blocks = 0

604

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

605

for xf in self.transfers:

606

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

607

for _, sr in xf.stash_before:

608

sh = self.src.RangeSha1(sr)

609

if sh in stashes:

610

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

611

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

612

stashes[sh] = 1

613

stashed_blocks += sr.size()

614

self.touched_src_ranges = self.touched_src_ranges.union(sr)

615

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

616

617

if stashed_blocks > max_stashed_blocks:

618

max_stashed_blocks = stashed_blocks

619

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

620

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

621

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

622

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

623

# <# blocks> <src ranges>

624

# OR

625

# <# blocks> <src ranges> <src locs> <stash refs...>

626

# OR

627

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

628

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

629

size = xf.src_ranges.size()

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

630

src_str_buffer = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

631

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

632

unstashed_src_ranges = xf.src_ranges

633

mapped_stashes = []

634

for _, sr in xf.use_stash:

635

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

636

sh = self.src.RangeSha1(sr)

637

sr = xf.src_ranges.map_within(sr)

638

mapped_stashes.append(sr)

639

assert sh in stashes

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

640

src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

641

stashes[sh] -= 1

642

if stashes[sh] == 0:

643

free_string.append("free %s\n" % (sh,))

644

free_size += sr.size()

645

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

646

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

647

if unstashed_src_ranges:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

648

src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

649

if xf.use_stash:

650

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

651

src_str_buffer.insert(2, mapped_unstashed.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

652

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

653

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

654

else:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

655

src_str_buffer.insert(1, "-")

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

656

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

657

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

658

src_str = " ".join(src_str_buffer)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

659

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

660

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

661

# zero <rangeset>

662

# new <rangeset>

663

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

664

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

665

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

666

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

667

668

tgt_size = xf.tgt_ranges.size()

669

670

if xf.style == "new":

671

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

672

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

673

total += tgt_size

674

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

675

assert xf.tgt_ranges

676

assert xf.src_ranges.size() == tgt_size

677

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

678

# take into account automatic stashing of overlapping blocks

679

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

680

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

681

if temp_stash_usage > max_stashed_blocks:

682

max_stashed_blocks = temp_stash_usage

683

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

684

self.touched_src_ranges = self.touched_src_ranges.union(

685

xf.src_ranges)

686

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

687

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

688

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

689

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

690

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

691

total += tgt_size

692

elif xf.style in ("bsdiff", "imgdiff"):

693

assert xf.tgt_ranges

694

assert xf.src_ranges

695

# take into account automatic stashing of overlapping blocks

696

if xf.src_ranges.overlaps(xf.tgt_ranges):

697

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

698

if temp_stash_usage > max_stashed_blocks:

699

max_stashed_blocks = temp_stash_usage

700

701

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

702

703

out.append("%s %d %d %s %s %s %s\n" % (

704

xf.style,

705

xf.patch_start, xf.patch_len,

706

xf.src_sha1,

707

xf.tgt_sha1,

708

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

709

total += tgt_size

710

elif xf.style == "zero":

711

assert xf.tgt_ranges

712

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

713

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

714

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

715

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

716

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

717

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

718

if free_string:

719

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

720

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

721

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

722

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

723

# Sanity check: abort if we're going to need more stash space than

724

# the allowed size (cache_size * threshold). There are two purposes

725

# of having a threshold here. a) Part of the cache may have been

726

# occupied by some recovery logs. b) It will buy us some time to deal

727

# with the oversize issue.

728

cache_size = common.OPTIONS.cache_size

729

stash_threshold = common.OPTIONS.stash_threshold

730

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

731

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

732

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

733

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

734

self.tgt.blocksize, max_allowed, cache_size,

735

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

736

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

737

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

738

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

739

if self.tgt.hashtree_info:

740

out.append("compute_hash_tree {} {} {} {} {}\n".format(

741

self.tgt.hashtree_info.hashtree_range.to_string_raw(),

742

self.tgt.hashtree_info.filesystem_range.to_string_raw(),

743

self.tgt.hashtree_info.hash_algorithm,

744

self.tgt.hashtree_info.salt,

745

self.tgt.hashtree_info.root_hash))

746

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

747

# Zero out extended blocks as a workaround for bug 20881595.

748

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

749

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

750

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

751

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

752

753

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

754

# data in the new image; b) will not be touched by dm-verity. Out of those

755

# blocks, we erase the ones that won't be used in this update at the

756

# beginning of an update. The rest would be erased at the end. This is to

757

# work around the eMMC issue observed on some devices, which may otherwise

758

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

759

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

760

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

761

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

762

763

erase_first = new_dontcare.subtract(self.touched_src_ranges)

764

if erase_first:

765

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

766

767

erase_last = new_dontcare.subtract(erase_first)

768

if erase_last:

769

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

770

771

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

772

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

773

# v3+: the number of stash slots is unused.

774

out.insert(2, "0\n")

775

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

776

777

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

781

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

782

OPTIONS = common.OPTIONS

783

if OPTIONS.cache_size is not None:

784

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

785

logger.info(

786

"max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",

787

max_stashed_blocks, self._max_stashed_size, max_allowed,

788

self._max_stashed_size * 100.0 / max_allowed)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

789

else:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

790

logger.info(

791

"max stashed blocks: %d (%d bytes), limit: <unknown>\n",

792

max_stashed_blocks, self._max_stashed_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

793

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

794

def ReviseStashSize(self, ignore_stash_limit=False):

795

""" Revises the transfers to keep the stash size within the size limit.

796

797

Iterates through the transfer list and calculates the stash size each

798

transfer generates. Converts the affected transfers to new if we reach the

stash limit.

Args:

ignore_stash_limit: Ignores the stash limit and calculates the max

803

simultaneous stashed blocks instead. No change will be made to the

804

transfer list with this flag.

805

806

Return:

807

A tuple of (tgt blocks converted to new, max stashed blocks)

808

"""

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

809

logger.info("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

810

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

811

812

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

813

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

814

for xf in self.transfers:

815

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

816

for stash_raw_id, sr in xf.stash_before:

817

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

818

819

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

820

for stash_raw_id, _ in xf.use_stash:

821

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

822

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

823

max_allowed_blocks = None

824

if not ignore_stash_limit:

825

# Compute the maximum blocks available for stash based on /cache size and

826

# the threshold.

827

cache_size = common.OPTIONS.cache_size

828

stash_threshold = common.OPTIONS.stash_threshold

829

max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

830

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

831

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

832

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

833

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

834

new_blocks = 0

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

835

max_stashed_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

836

837

# Now go through all the commands. Compute the required stash size on the

838

# fly. If a command requires excess stash than available, it deletes the

839

# stash by replacing the command that uses the stash with a "new" command

840

# instead.

841

for xf in self.transfers:

842

replaced_cmds = []

843

844

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

845

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

846

# Check the post-command stashed_blocks.

847

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

848

sh = self.src.RangeSha1(sr)

849

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

850

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

851

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

852

if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

853

# We cannot stash this one for a later command. Find out the command

854

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

855

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

856

replaced_cmds.append(use_cmd)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

857

logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

858

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

859

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

860

if sh in stashes:

861

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

862

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

863

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

864

stashed_blocks = stashed_blocks_after

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

865

max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

866

867

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

868

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

869

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

870

assert xf.tgt_ranges and xf.src_ranges

871

if xf.src_ranges.overlaps(xf.tgt_ranges):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

872

if (max_allowed_blocks and

873

stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

874

replaced_cmds.append(xf)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

875

logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

876

else:

877

# The whole source ranges will be stashed for implicit stashes.

878

max_stashed_blocks = max(max_stashed_blocks,

879

stashed_blocks + xf.src_ranges.size())

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

880

881

# Replace the commands in replaced_cmds with "new"s.

882

for cmd in replaced_cmds:

883

# It no longer uses any commands in "use_stash". Remove the def points

884

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

885

for stash_raw_id, sr in cmd.use_stash:

886

def_cmd = stash_map[stash_raw_id][1]

887

assert (stash_raw_id, sr) in def_cmd.stash_before

888

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

889

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

890

# Add up blocks that violates space limit and print total number to

891

# screen later.

892

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

893

cmd.ConvertToNew()

894

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

895

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

896

for _, sr in xf.use_stash:

897

sh = self.src.RangeSha1(sr)

898

assert sh in stashes

899

stashes[sh] -= 1

900

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

901

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

902

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

903

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

904

num_of_bytes = new_blocks * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

905

logger.info(

906

" Total %d blocks (%d bytes) are packed as new blocks due to "

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

907

"insufficient cache size. Maximum blocks stashed simultaneously: %d",

908

new_blocks, num_of_bytes, max_stashed_blocks)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

909

return new_blocks, max_stashed_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

910

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

911

def ComputePatches(self, prefix):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

912

logger.info("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

913

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

914

patch_num = 0

915

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

916

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

917

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

918

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

919

logger.info(

920

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

921

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

922

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

923

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

924

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

925

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

926

logger.info(

927

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

928

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

929

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

930

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

931

# We can't compare src and tgt directly because they may have

932

# the same content but be broken up into blocks differently, eg:

933

#

934

# ["he", "llo"] vs ["h", "ello"]

935

#

936

# We want those to compare equal, ideally without having to

937

# actually concatenate the strings (these may be tens of

938

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

939

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

940

# These are identical; we don't need to generate a patch,

941

# just issue copy commands on the device.

942

xf.style = "move"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

943

xf.patch_info = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

944

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

945

if xf.src_ranges != xf.tgt_ranges:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

946

logger.info(

947

"%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,

948

100.0, xf.style,

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

949

xf.tgt_name if xf.tgt_name == xf.src_name else (

950

xf.tgt_name + " (from " + xf.src_name + ")"),

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

951

str(xf.tgt_ranges), str(xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

952

else:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

953

if xf.patch_info:

954

# We have already generated the patch (e.g. during split of large

955

# APKs or reduction of stash size)

956

imgdiff = xf.patch_info.imgdiff

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

957

else:

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

958

imgdiff = self.CanUseImgdiff(

959

xf.tgt_name, xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

960

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

961

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

966

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

967

patches = self.ComputePatchesForInputList(diff_queue, False)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

968

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

969

offset = 0

970

with open(prefix + ".patch.dat", "wb") as patch_fd:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

971

for index, patch_info, _ in patches:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

972

xf = self.transfers[index]

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

973

xf.patch_len = len(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

974

xf.patch_start = offset

975

offset += xf.patch_len

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

976

patch_fd.write(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

977

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

978

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

979

logger.info(

980

"%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,

981

xf.patch_len * 100.0 / tgt_size, xf.style,

982

xf.tgt_name if xf.tgt_name == xf.src_name else (

983

xf.tgt_name + " (from " + xf.src_name + ")"),

984

xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

985

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

986

def AssertSha1Good(self):

987

"""Check the SHA-1 of the src & tgt blocks in the transfer list.

988

989

Double check the SHA-1 value to avoid the issue in b/71908713, where

990

SparseImage.RangeSha1() messed up with the hash calculation in multi-thread

991

environment. That specific problem has been fixed by protecting the

992

underlying generator function 'SparseImage._GetRangeData()' with lock.

993

"""

994

for xf in self.transfers:

995

tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)

996

assert xf.tgt_sha1 == tgt_sha1

997

if xf.style == "diff":

998

src_sha1 = self.src.RangeSha1(xf.src_ranges)

999

assert xf.src_sha1 == src_sha1

1000

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1001

def AssertSequenceGood(self):

1002

# Simulate the sequences of transfers we will output, and check that:

1003

# - we never read a block after writing it, and

1004

# - we write every block we care about exactly once.

1005

1006

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1007

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1008

1009

# Imagine processing the transfers in order.

1010

for xf in self.transfers:

1011

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1012

1013

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1014

for _, sr in xf.use_stash:

1015

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1016

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1017

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

1018

# Source image could be larger. Don't check the blocks that are in the

1019

# source image only. Since they are not in 'touched', and won't ever

1020

# be touched.

1021

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1022

assert touched[i] == 0

1023

1024

# Check that the output blocks for this transfer haven't yet

1025

# been touched, and touch all the blocks written by this

1026

# transfer.

1027

for s, e in xf.tgt_ranges:

1028

for i in range(s, e):

1029

assert touched[i] == 0

1030

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1031

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

1032

if self.tgt.hashtree_info:

1033

for s, e in self.tgt.hashtree_info.hashtree_range:

1034

for i in range(s, e):

1035

assert touched[i] == 0

1036

touched[i] = 1

1037

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1038

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1039

for s, e in self.tgt.care_map:

1040

for i in range(s, e):

1041

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1042

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1043

def FindSequenceForTransfers(self):

1044

"""Finds a sequence for the given transfers.

1045

1046

The goal is to minimize the violation of order dependencies between these

1047

transfers, so that fewer blocks are stashed when applying the update.

1048

"""

1049

1050

# Clear the existing dependency between transfers

1051

for xf in self.transfers:

1052

xf.goes_before = OrderedDict()

1053

xf.goes_after = OrderedDict()

xf.stash_before = []

xf.use_stash = []

# Find the ordering dependencies among transfers (this is O(n^2)

1059

# in the number of transfers).

1060

self.GenerateDigraph()

1061

# Find a sequence of transfers that satisfies as many ordering

1062

# dependencies as possible (heuristically).

1063

self.FindVertexSequence()

1064

# Fix up the ordering dependencies that the sequence didn't

1065

# satisfy.

1066

self.ReverseBackwardEdges()

1067

self.ImproveVertexSequence()

1068

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1069

def ImproveVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1070

logger.info("Improving vertex order...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1071

1072

# At this point our digraph is acyclic; we reversed any edges that

1073

# were backwards in the heuristically-generated sequence. The

1074

# previously-generated order is still acceptable, but we hope to

1075

# find a better order that needs less memory for stashed data.

1076

# Now we do a topological sort to generate a new vertex order,

1077

# using a greedy algorithm to choose which vertex goes next

1078

# whenever we have a choice.

1079

1080

# Make a copy of the edge set; this copy will get destroyed by the

1081

# algorithm.

1082

for xf in self.transfers:

1083

xf.incoming = xf.goes_after.copy()

1084

xf.outgoing = xf.goes_before.copy()

1085

1086

L = [] # the new vertex order

1087

1088

# S is the set of sources in the remaining graph; we always choose

1089

# the one that leaves the least amount of stashed data after it's

1090

# executed.

1091

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

1097

L.append(xf)

1098

for u in xf.outgoing:

1099

del u.incoming[xf]

1100

if not u.incoming:

1101

heapq.heappush(S, (u.NetStashChange(), u.order, u))

1102

1103

# if this fails then our graph had a cycle.

1104

assert len(L) == len(self.transfers)

1105

1106

self.transfers = L

1107

for i, xf in enumerate(L):

1108

xf.order = i

1109

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1110

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1111

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

1112

1113

For each transfer, make sure it properly stashes the blocks it touches and

1114

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

1115

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

1116

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

1117

it is possible to have multiple pairs with different 'stash_raw_id's. Each

1118

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

1119

blocks will be written to the same stash slot in WriteTransfers().

1120

"""

1121

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1122

logger.info("Reversing backward edges...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1123

in_order = 0

1124

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1125

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1126

stash_size = 0

1127

1128

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1129

for u in xf.goes_before.copy():

1130

# xf should go before u

1131

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

1136

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

1141

assert overlap

1142

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1143

u.stash_before.append((stash_raw_id, overlap))

1144

xf.use_stash.append((stash_raw_id, overlap))

1145

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1146

stash_size += overlap.size()

1147

1148

# reverse the edge direction; now xf must go after u

1149

del xf.goes_before[u]

1150

del u.goes_after[xf]

1151

xf.goes_after[u] = None # value doesn't matter

1152

u.goes_before[xf] = None

1153

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1154

logger.info(

1155

" %d/%d dependencies (%.2f%%) were violated; %d source blocks "

1156

"stashed.", out_of_order, in_order + out_of_order,

1157

(out_of_order * 100.0 / (in_order + out_of_order)) if (

1158

in_order + out_of_order) else 0.0,

1159

stash_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1160

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1161

def FindVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1162

logger.info("Finding vertex sequence...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1163

1164

# This is based on "A Fast & Effective Heuristic for the Feedback

1165

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1166

# it as starting with the digraph G and moving all the vertices to

1167

# be on a horizontal line in some order, trying to minimize the

1168

# number of edges that end up pointing to the left. Left-pointing

1169

# edges will get removed to turn the digraph into a DAG. In this

1170

# case each edge has a weight which is the number of source blocks

1171

# we'll lose if that edge is removed; we try to minimize the total

1172

# weight rather than just the number of edges.

1173

1174

# Make a copy of the edge set; this copy will get destroyed by the

1175

# algorithm.

1176

for xf in self.transfers:

1177

xf.incoming = xf.goes_after.copy()

1178

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1179

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1180

1181

# We use an OrderedDict instead of just a set so that the output

1182

# is repeatable; otherwise it would depend on the hash values of

1183

# the transfer objects.

1184

G = OrderedDict()

1185

for xf in self.transfers:

1186

G[xf] = None

1187

s1 = deque() # the left side of the sequence, built from left to right

1188

s2 = deque() # the right side of the sequence, built from right to left

1189

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1190

heap = []

1191

for xf in self.transfers:

1192

xf.heap_item = HeapItem(xf)

1193

heap.append(xf.heap_item)

1194

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1195

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1196

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1197

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1198

# { key1: None, key2: None, ... }.

1199

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1200

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1201

1202

def adjust_score(iu, delta):

1203

iu.score += delta

1204

iu.heap_item.clear()

1205

iu.heap_item = HeapItem(iu)

1206

heapq.heappush(heap, iu.heap_item)

1207

1208

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1209

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1210

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1211

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1212

for u in sinks:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1213

if u not in G:

1214

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1215

s2.appendleft(u)

1216

del G[u]

1217

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1218

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1219

if not iu.outgoing:

1220

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1221

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1222

1223

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1224

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1225

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1226

for u in sources:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1227

if u not in G:

1228

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1229

s1.append(u)

1230

del G[u]

1231

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1232

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1233

if not iu.incoming:

1234

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1235

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1236

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1237

if not G:

1238

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1239

1240

# Find the "best" vertex to put next. "Best" is the one that

1241

# maximizes the net difference in source blocks saved we get by

1242

# pretending it's a source rather than a sink.

1243

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1244

while True:

1245

u = heapq.heappop(heap)

1246

if u and u.item in G:

1247

u = u.item

1248

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1249

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1250

s1.append(u)

1251

del G[u]

1252

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1253

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1254

if not iu.incoming:

1255

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1256

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1257

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1258

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1259

if not iu.outgoing:

1260

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1261

1262

# Now record the sequence in the 'order' field of each transfer,

1263

# and by rearranging self.transfers to be in the chosen sequence.

1264

1265

new_transfers = []

1266

for x in itertools.chain(s1, s2):

1267

x.order = len(new_transfers)

1268

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1273

1274

def GenerateDigraph(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1275

logger.info("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1276

1277

# Each item of source_ranges will be:

1278

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1279

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1280

source_ranges = []

1281

for b in self.transfers:

1282

for s, e in b.src_ranges:

1283

if e > len(source_ranges):

1284

source_ranges.extend([None] * (e-len(source_ranges)))

1285

for i in range(s, e):

1286

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1287

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1288

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1289

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1290

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1291

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1292

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1293

for s, e in a.tgt_ranges:

1294

for i in range(s, e):

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1295

if i >= len(source_ranges):

1296

break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1297

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1298

if source_ranges[i] is not None:

1299

for j in source_ranges[i]:

1300

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1301

1302

for b in intersections:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1303

if a is b:

1304

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1305

1306

# If the blocks written by A are read by B, then B needs to go before A.

1307

i = a.tgt_ranges.intersect(b.src_ranges)

1308

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1309

if b.src_name == "__ZERO":

1310

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1315

b.goes_before[a] = size

1316

a.goes_after[b] = size

1317

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1318

def ComputePatchesForInputList(self, diff_queue, compress_target):

1319

"""Returns a list of patch information for the input list of transfers.

1320

1321

Args:

1322

diff_queue: a list of transfers with style 'diff'

1323

compress_target: If True, compresses the target ranges of each

1324

transfers; and save the size.

1325

1326

Returns:

1327

A list of (transfer order, patch_info, compressed_size) tuples.

"""

if not diff_queue:

return []

if self.threads > 1:

logger.info("Computing patches (using %d threads)...", self.threads)

1335

else:

1336

logger.info("Computing patches...")

1337

1338

diff_total = len(diff_queue)

1339

patches = [None] * diff_total

1340

error_messages = []

1341

1342

# Using multiprocessing doesn't give additional benefits, due to the

1343

# pattern of the code. The diffing work is done by subprocess.call, which

1344

# already runs in a separate process (not affected much by the GIL -

1345

# Global Interpreter Lock). Using multiprocess also requires either a)

1346

# writing the diff input files in the main process before forking, or b)

1347

# reopening the image file (SparseImage) in the worker processes. Doing

1348

# neither of them further improves the performance.

1349

lock = threading.Lock()

def diff_worker():

while True:

with lock:

if not diff_queue:

return

xf_index, imgdiff, patch_index = diff_queue.pop()

1357

xf = self.transfers[xf_index]

1358

1359

message = []

1360

compressed_size = None

1361

1362

patch_info = xf.patch_info

1363

if not patch_info:

1364

src_file = common.MakeTempFile(prefix="src-")

1365

with open(src_file, "wb") as fd:

1366

self.src.WriteRangeDataToFd(xf.src_ranges, fd)

1367

1368

tgt_file = common.MakeTempFile(prefix="tgt-")

1369

with open(tgt_file, "wb") as fd:

1370

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)

1371

1372

try:

1373

patch_info = compute_patch(src_file, tgt_file, imgdiff)

1374

except ValueError as e:

1375

message.append(

1376

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

1377

"imgdiff" if imgdiff else "bsdiff",

1378

xf.tgt_name if xf.tgt_name == xf.src_name else

1379

xf.tgt_name + " (from " + xf.src_name + ")",

1380

xf.tgt_ranges, xf.src_ranges, e.message))

1381

1382

if compress_target:

1383

tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)

1384

try:

1385

# Compresses with the default level

1386

compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)

1387

compressed_data = (compress_obj.compress("".join(tgt_data))

1388

+ compress_obj.flush())

1389

compressed_size = len(compressed_data)

1390

except zlib.error as e:

1391

message.append(

1392

"Failed to compress the data in target range {} for {}:\n"

1393

"{}".format(xf.tgt_ranges, xf.tgt_name, e.message))

if message:

with lock:

error_messages.extend(message)

1398

1399

with lock:

1400

patches[patch_index] = (xf_index, patch_info, compressed_size)

1401

1402

threads = [threading.Thread(target=diff_worker)

1403

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

if error_messages:

logger.error('ERROR:')

1411

logger.error('\n'.join(error_messages))

1412

logger.error('\n\n\n')

sys.exit(1)

return patches

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1417

def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1418

"""Converts the diff transfers to reduce the max simultaneous stash.

1419

1420

Since the 'new' data is compressed with deflate, we can select the 'diff'

1421

transfers for conversion by comparing its patch size with the size of the

1422

compressed data. Ideally, we want to convert the transfers with a small

1423

size increase, but using a large number of stashed blocks.

1424

"""

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1425

TransferSizeScore = namedtuple("TransferSizeScore",

1426

"xf, used_stash_blocks, score")

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1427

1428

logger.info("Selecting diff commands to convert to new.")

1429

diff_queue = []

1430

for xf in self.transfers:

1431

if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:

1432

use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,

1433

xf.src_ranges)

1434

diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))

1435

1436

# Remove the 'move' transfers, and compute the patch & compressed size

1437

# for the remaining.

1438

result = self.ComputePatchesForInputList(diff_queue, True)

1439

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1440

conversion_candidates = []

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1441

for xf_index, patch_info, compressed_size in result:

1442

xf = self.transfers[xf_index]

1443

if not xf.patch_info:

1444

xf.patch_info = patch_info

1445

1446

size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size

1447

diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1448

logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1449

" compression_size: %d, ratio %.2f%%", xf.tgt_name,

1450

xf.tgt_ranges.size(), diff_style,

1451

len(xf.patch_info.content), compressed_size, size_ratio)

1452

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1453

used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1454

# Convert the transfer to new if the compressed size is smaller or equal.

1455

# We don't need to maintain the stash_before lists here because the

1456

# graph will be regenerated later.

1457

if len(xf.patch_info.content) >= compressed_size:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1458

# Add the transfer to the candidate list with negative score. And it

1459

# will be converted later.

1460

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1461

-1))

1462

elif used_stash_blocks > 0:

1463

# This heuristic represents the size increase in the final package to

1464

# remove per unit of stashed data.

1465

score = ((compressed_size - len(xf.patch_info.content)) * 100.0

1466

/ used_stash_blocks)

1467

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1468

score))

1469

# Transfers with lower score (i.e. less expensive to convert) will be

1470

# converted first.

1471

conversion_candidates.sort(key=lambda x: x.score)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1472

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1473

# TODO(xunchang), improve the logic to find the transfers to convert, e.g.

1474

# convert the ones that contribute to the max stash, run ReviseStashSize

1475

# multiple times etc.

1476

removed_stashed_blocks = 0

1477

for xf, used_stash_blocks, _ in conversion_candidates:

1478

logger.info("Converting %s to new", xf.tgt_name)

1479

xf.ConvertToNew()

1480

removed_stashed_blocks += used_stash_blocks

1481

# Experiments show that we will get a smaller package size if we remove

1482

# slightly more stashed blocks than the violated stash blocks.

1483

if removed_stashed_blocks >= violated_stash_blocks:

1484

break

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1485

1486

logger.info("Removed %d stashed blocks", removed_stashed_blocks)

1487

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1488

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1489

"""Parse the file_map to generate all the transfers."""

1490

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1491

def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1492

src_ranges, style, by_id):

1493

"""Add one or multiple Transfer()s by splitting large files.

1494

1495

For BBOTA v3, we need to stash source blocks for resumable feature.

1496

However, with the growth of file size and the shrink of the cache

1497

partition source blocks are too large to be stashed. If a file occupies

1498

too many blocks, we split it into smaller pieces by getting multiple

1499

Transfer()s.

1500

1501

The downside is that after splitting, we may increase the package size

1502

since the split pieces don't align well. According to our experiments,

1503

1/8 of the cache size as the per-piece limit appears to be optimal.

1504

Compared to the fixed 1024-block limit, it reduces the overall package

1505

size by 30% for volantis, and 20% for angler and bullhead."""

1506

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1507

pieces = 0

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1508

while (tgt_ranges.size() > max_blocks_per_transfer and

1509

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1510

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1511

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1512

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1513

src_first = src_ranges.first(max_blocks_per_transfer)

1514

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1515

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1516

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1517

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1518

1519

tgt_ranges = tgt_ranges.subtract(tgt_first)

1520

src_ranges = src_ranges.subtract(src_first)

1521

pieces += 1

1522

1523

# Handle remaining blocks.

1524

if tgt_ranges.size() or src_ranges.size():

1525

# Must be both non-empty.

1526

assert tgt_ranges.size() and src_ranges.size()

1527

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1528

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1529

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1530

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1531

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1532

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1533

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,

1534

by_id):

1535

"""Find all the zip files and split the others with a fixed chunk size.

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1536

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1537

This function will construct a list of zip archives, which will later be

1538

split by imgdiff to reduce the final patch size. For the other files,

1539

we will plainly split them based on a fixed chunk size with the potential

1540

patch size penalty.

1541

"""

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1542

1543

assert style == "diff"

1544

1545

# Change nothing for small files.

1546

if (tgt_ranges.size() <= max_blocks_per_transfer and

1547

src_ranges.size() <= max_blocks_per_transfer):

1548

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1549

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

style, by_id)

return

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

1553

# Split large APKs with imgdiff, if possible. We're intentionally checking

1554

# file types one more time (CanUseImgdiff() checks that as well), before

1555

# calling the costly RangeSha1()s.

1556

if (self.FileTypeSupportedByImgdiff(tgt_name) and

1557

self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

1558

if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1559

large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))

1560

return

1561

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1562

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1563

src_ranges, style, by_id)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1564

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1565

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1566

split=False):

1567

"""Wrapper function for adding a Transfer()."""

1568

1569

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1570

# otherwise add the Transfer() as is.

1571

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1572

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1573

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1574

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1575

return

1576

1577

# Handle .odex files specially to analyze the block-wise difference. If

1578

# most of the blocks are identical with only few changes (e.g. header),

1579

# we will patch the changed blocks only. This avoids stashing unchanged

1580

# blocks while patching. We limit the analysis to files without size

1581

# changes only. This is to avoid sacrificing the OTA generation cost too

1582

# much.

1583

if (tgt_name.split(".")[-1].lower() == 'odex' and

1584

tgt_ranges.size() == src_ranges.size()):

1585

1586

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1587

# few blocks remain identical, we lose the opportunity to use imgdiff

1588

# that may have better compression ratio than bsdiff.

1589

crop_threshold = 0.5

1590

1591

tgt_skipped = RangeSet()

1592

src_skipped = RangeSet()

1593

tgt_size = tgt_ranges.size()

1594

tgt_changed = 0

1595

for src_block, tgt_block in zip(src_ranges.next_item(),

1596

tgt_ranges.next_item()):

1597

src_rs = RangeSet(str(src_block))

1598

tgt_rs = RangeSet(str(tgt_block))

1599

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1600

tgt_skipped = tgt_skipped.union(tgt_rs)

1601

src_skipped = src_skipped.union(src_rs)

1602

else:

1603

tgt_changed += tgt_rs.size()

1604

1605

# Terminate early if no clear sign of benefits.

1606

if tgt_changed > tgt_size * crop_threshold:

1607

break

1608

1609

if tgt_changed < tgt_size * crop_threshold:

1610

assert tgt_changed + tgt_skipped.size() == tgt_size

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1611

logger.info(

1612

'%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,

1613

tgt_skipped.size() * 100.0 / tgt_size, tgt_name)

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1614

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1615

"%s-skipped" % (tgt_name,),

1616

"%s-skipped" % (src_name,),

1617

tgt_skipped, src_skipped, style, by_id)

1618

1619

# Intentionally change the file extension to avoid being imgdiff'd as

1620

# the files are no longer in their original format.

1621

tgt_name = "%s-cropped" % (tgt_name,)

1622

src_name = "%s-cropped" % (src_name,)

1623

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1624

src_ranges = src_ranges.subtract(src_skipped)

1625

1626

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1631

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1632

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1633

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1634

def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,

1635

split_info):

1636

"""Parse the split_info and return a list of info tuples.

1637

1638

Args:

1639

patch_size: total size of the patch file.

1640

tgt_ranges: Ranges of the target file within the original image.

1641

src_ranges: Ranges of the source file within the original image.

split_info format:

imgdiff version#

count of pieces

<patch_size_1> <tgt_size_1> <src_ranges_1>

1646

...

1647

<patch_size_n> <tgt_size_n> <src_ranges_n>

1648

1649

Returns:

1650

[patch_start, patch_len, split_tgt_ranges, split_src_ranges]

1651

"""

1652

1653

version = int(split_info[0])

1654

assert version == 2

1655

count = int(split_info[1])

1656

assert len(split_info) - 2 == count

split_info_list = []

patch_start = 0

tgt_remain = copy.deepcopy(tgt_ranges)

1661

# each line has the format <patch_size>, <tgt_size>, <src_ranges>

1662

for line in split_info[2:]:

1663

info = line.split()

1664

assert len(info) == 3

1665

patch_length = int(info[0])

1666

1667

split_tgt_size = int(info[1])

1668

assert split_tgt_size % 4096 == 0

1669

assert split_tgt_size / 4096 <= tgt_remain.size()

1670

split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)

1671

tgt_remain = tgt_remain.subtract(split_tgt_ranges)

1672

1673

# Find the split_src_ranges within the image file from its relative

1674

# position in file.

1675

split_src_indices = RangeSet.parse_raw(info[2])

1676

split_src_ranges = RangeSet()

1677

for r in split_src_indices:

1678

curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))

1679

assert not split_src_ranges.overlaps(curr_range)

1680

split_src_ranges = split_src_ranges.union(curr_range)

1681

1682

split_info_list.append((patch_start, patch_length,

1683

split_tgt_ranges, split_src_ranges))

1684

patch_start += patch_length

1685

1686

# Check that the sizes of all the split pieces add up to the final file

1687

# size for patch and target.

1688

assert tgt_remain.size() == 0

1689

assert patch_start == patch_size

1690

return split_info_list

1691

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1692

def SplitLargeApks():

1693

"""Split the large apks files.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1694

1695

Example: Chrome.apk will be split into

1696

src-0: Chrome.apk-0, tgt-0: Chrome.apk-0

1697

src-1: Chrome.apk-1, tgt-1: Chrome.apk-1

1698

...

1699

1700

After the split, the target pieces are continuous and block aligned; and

1701

the source pieces are mutually exclusive. During the split, we also

1702

generate and save the image patch between src-X & tgt-X. This patch will

1703

be valid because the block ranges of src-X & tgt-X will always stay the

1704

same afterwards; but there's a chance we don't use the patch if we

1705

convert the "diff" command into "new" or "move" later.

"""

while True:

with transfer_lock:

if not large_apks:

return

tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)

1713

1714

src_file = common.MakeTempFile(prefix="src-")

1715

tgt_file = common.MakeTempFile(prefix="tgt-")

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

1716

with open(src_file, "wb") as src_fd:

1717

self.src.WriteRangeDataToFd(src_ranges, src_fd)

1718

with open(tgt_file, "wb") as tgt_fd:

1719

self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1720

1721

patch_file = common.MakeTempFile(prefix="patch-")

1722

patch_info_file = common.MakeTempFile(prefix="split_info-")

1723

cmd = ["imgdiff", "-z",

1724

"--block-limit={}".format(max_blocks_per_transfer),

1725

"--split-info=" + patch_info_file,

1726

src_file, tgt_file, patch_file]

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

1727

proc = common.Run(cmd)

1728

imgdiff_output, _ = proc.communicate()

1729

assert proc.returncode == 0, \

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

1730

"Failed to create imgdiff patch between {} and {}:\n{}".format(

1731

src_name, tgt_name, imgdiff_output)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1732

1733

with open(patch_info_file) as patch_info:

1734

lines = patch_info.readlines()

1735

1736

patch_size_total = os.path.getsize(patch_file)

1737

split_info_list = ParseAndValidateSplitInfo(patch_size_total,

1738

tgt_ranges, src_ranges,

1739

lines)

1740

for index, (patch_start, patch_length, split_tgt_ranges,

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1741

split_src_ranges) in enumerate(split_info_list):

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1742

with open(patch_file) as f:

1743

f.seek(patch_start)

1744

patch_content = f.read(patch_length)

1745

1746

split_src_name = "{}-{}".format(src_name, index)

1747

split_tgt_name = "{}-{}".format(tgt_name, index)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1748

split_large_apks.append((split_tgt_name,

split_src_name,

split_tgt_ranges,

split_src_ranges,

patch_content))

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1753

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1754

logger.info("Finding transfers...")

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1755

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1756

large_apks = []

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1757

split_large_apks = []

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1758

cache_size = common.OPTIONS.cache_size

1759

split_threshold = 0.125

1760

max_blocks_per_transfer = int(cache_size * split_threshold /

1761

self.tgt.blocksize)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1762

empty = RangeSet()

Tianjie Xu

20a86cd

2018-01-12 12:21:00 -0800

[diff] [blame]

1763

for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1764

if tgt_fn == "__ZERO":

1765

# the special "__ZERO" domain is all the blocks not contained

1766

# in any file and that are filled with zeros. We have a

1767

# special transfer style for zero blocks.

1768

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1769

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1770

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1771

continue

1772

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1773

elif tgt_fn == "__COPY":

1774

# "__COPY" domain includes all the blocks not contained in any

1775

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1776

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1777

continue

1778

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

1779

elif tgt_fn == "__HASHTREE":

1780

continue

1781

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1782

elif tgt_fn in self.src.file_map:

1783

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1784

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1785

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1786

continue

1787

1788

b = os.path.basename(tgt_fn)

1789

if b in self.src_basenames:

1790

# Look for an exact basename match in the source.

1791

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1792

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1793

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1794

continue

1795

1796

b = re.sub("[0-9]+", "#", b)

1797

if b in self.src_numpatterns:

1798

# Look for a 'number pattern' match (a basename match after

1799

# all runs of digits are replaced by "#"). (This is useful

1800

# for .so files that contain version numbers in the filename

1801

# that get bumped.)

1802

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1803

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1804

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1805

continue

1806

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1807

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1808

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1809

transfer_lock = threading.Lock()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1810

threads = [threading.Thread(target=SplitLargeApks)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1811

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1817

# Sort the split transfers for large apks to generate a determinate package.

1818

split_large_apks.sort()

1819

for (tgt_name, src_name, tgt_ranges, src_ranges,

1820

patch) in split_large_apks:

1821

transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1822

self.tgt.RangeSha1(tgt_ranges),

1823

self.src.RangeSha1(src_ranges),

1824

"diff", self.transfers)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1825

transfer_split.patch_info = PatchInfo(True, patch)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1826

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1827

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1828

for k in self.src.file_map.keys():

1829

b = os.path.basename(k)

1830

self.src_basenames[b] = k

1831

b = re.sub("[0-9]+", "#", b)

1832

self.src_numpatterns[b] = k

1833

1834

@staticmethod

1835

def AssertPartition(total, seq):

1836

"""Assert that all the RangeSets in 'seq' form a partition of the

1837

'total' RangeSet (ie, they are nonintersecting and their union

1838

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1839

Doug Zongker