Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

18

import copy

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

20

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

21

import itertools

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

22

import logging

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

23

import multiprocessing

24

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

25

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

27

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

29

import zlib

30

from collections import deque, namedtuple, OrderedDict

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

31

from hashlib import sha1

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

32

33

import common

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

34

from rangelib import RangeSet

35

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

36

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

37

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

38

logger = logging.getLogger(__name__)

39

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

40

# The tuple contains the style and bytes of a bsdiff|imgdiff patch.

41

PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])

42

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

43

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

44

def compute_patch(srcfile, tgtfile, imgdiff=False):

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

45

"""Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

46

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

47

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

48

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

49

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

50

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

51

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

52

# here, since they contain temp filenames only.

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

53

proc = common.Run(cmd, verbose=False)

54

output, _ = proc.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

55

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

56

if proc.returncode != 0:

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

57

raise ValueError(output)

58

59

with open(patchfile, 'rb') as f:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

60

return PatchInfo(imgdiff, f.read())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

61

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

62

63

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

64

def RangeSha1(self, ranges):

65

raise NotImplementedError

66

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

67

def ReadRangeSet(self, ranges):

68

raise NotImplementedError

69

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

70

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

71

raise NotImplementedError

72

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

73

def WriteRangeDataToFd(self, ranges, fd):

74

raise NotImplementedError

75

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

76

77

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

78

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

79

80

def __init__(self):

81

self.blocksize = 4096

82

self.care_map = RangeSet()

83

self.clobbered_blocks = RangeSet()

84

self.extended = RangeSet()

85

self.total_blocks = 0

86

self.file_map = {}

Yifan Hong

bb2658d

2019-01-25 12:30:58 -0800

[diff] [blame]

87

self.hashtree_info = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

88

89

def RangeSha1(self, ranges):

90

return sha1().hexdigest()

91

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

92

def ReadRangeSet(self, ranges):

93

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

94

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

95

def TotalSha1(self, include_clobbered_blocks=False):

96

# EmptyImage always carries empty clobbered_blocks, so

97

# include_clobbered_blocks can be ignored.

98

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

99

return sha1().hexdigest()

100

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

101

def WriteRangeDataToFd(self, ranges, fd):

102

raise ValueError("Can't write data from EmptyImage to file")

103

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

104

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

105

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

106

"""An image wrapped around a single string of data."""

107

108

def __init__(self, data, trim=False, pad=False):

109

self.data = data

110

self.blocksize = 4096

111

112

assert not (trim and pad)

113

114

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

115

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

116

if partial > 0:

117

if trim:

118

self.data = self.data[:-partial]

119

elif pad:

120

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

121

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

122

else:

123

raise ValueError(("data for DataImage must be multiple of %d bytes "

124

"unless trim or pad is specified") %

125

(self.blocksize,))

126

127

assert len(self.data) % self.blocksize == 0

128

129

self.total_blocks = len(self.data) / self.blocksize

130

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

131

# When the last block is padded, we always write the whole block even for

132

# incremental OTAs. Because otherwise the last block may get skipped if

133

# unchanged for an incremental, but would fail the post-install

134

# verification if it has non-zero contents in the padding bytes.

135

# Bug: 23828506

136

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

137

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

138

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

139

clobbered_blocks = []

140

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

141

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

146

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

147

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

148

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

149

if d == reference:

150

zero_blocks.append(i)

151

zero_blocks.append(i+1)

152

else:

153

nonzero_blocks.append(i)

154

nonzero_blocks.append(i+1)

155

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

156

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

157

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

158

self.file_map = dict()

159

if zero_blocks:

160

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

161

if nonzero_blocks:

162

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

163

if clobbered_blocks:

164

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

165

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

166

def _GetRangeData(self, ranges):

167

for s, e in ranges:

168

yield self.data[s*self.blocksize:e*self.blocksize]

169

170

def RangeSha1(self, ranges):

171

h = sha1()

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

172

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

176

def ReadRangeSet(self, ranges):

Yifan Hong

6f3eaeb

2019-04-09 16:49:33 -0700

[diff] [blame]

177

return list(self._GetRangeData(ranges))

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

178

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

179

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

180

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

181

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

182

else:

183

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

184

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

185

def WriteRangeDataToFd(self, ranges, fd):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

186

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

187

fd.write(data)

188

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

189

Yifan Hong

8a66a71

2019-04-04 15:37:57 -0700

[diff] [blame]

190

class FileImage(Image):

191

"""An image wrapped around a raw image file."""

192

193

def __init__(self, path, hashtree_info_generator=None):

194

self.path = path

195

self.blocksize = 4096

196

self._file_size = os.path.getsize(self.path)

197

self._file = open(self.path, 'r')

198

199

if self._file_size % self.blocksize != 0:

200

raise ValueError("Size of file %s must be multiple of %d bytes, but is %d"

201

% self.path, self.blocksize, self._file_size)

202

203

self.total_blocks = self._file_size / self.blocksize

204

self.care_map = RangeSet(data=(0, self.total_blocks))

205

self.clobbered_blocks = RangeSet()

206

self.extended = RangeSet()

207

Yifan Hong

55988c4

2019-04-12 15:01:12 -0700

[diff] [blame^]

208

self.generator_lock = threading.Lock()

209

Yifan Hong

8a66a71

2019-04-04 15:37:57 -0700

[diff] [blame]

210

self.hashtree_info = None

211

if hashtree_info_generator:

212

self.hashtree_info = hashtree_info_generator.Generate(self)

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

217

218

for i in range(self.total_blocks):

219

d = self._file.read(self.blocksize)

220

if d == reference:

221

zero_blocks.append(i)

222

zero_blocks.append(i+1)

223

else:

224

nonzero_blocks.append(i)

225

nonzero_blocks.append(i+1)

226

227

assert zero_blocks or nonzero_blocks

self.file_map = {}

if zero_blocks:

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

232

if nonzero_blocks:

233

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

234

if self.hashtree_info:

235

self.file_map["__HASHTREE"] = self.hashtree_info.hashtree_range

def __del__(self):

self._file.close()

def _GetRangeData(self, ranges):

Yifan Hong

55988c4

2019-04-12 15:01:12 -0700

[diff] [blame^]

241

# Use a lock to protect the generator so that we will not run two

242

# instances of this generator on the same object simultaneously.

243

with self.generator_lock:

244

for s, e in ranges:

245

self._file.seek(s * self.blocksize)

246

for _ in range(s, e):

247

yield self._file.read(self.blocksize)

Yifan Hong

8a66a71

2019-04-04 15:37:57 -0700

[diff] [blame]

248

249

def RangeSha1(self, ranges):

250

h = sha1()

251

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

h.update(data)

return h.hexdigest()

def ReadRangeSet(self, ranges):

256

return list(self._GetRangeData(ranges))

257

258

def TotalSha1(self, include_clobbered_blocks=False):

259

assert not self.clobbered_blocks

260

return self.RangeSha1(self.care_map)

261

262

def WriteRangeDataToFd(self, ranges, fd):

263

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

fd.write(data)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

267

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

268

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

269

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

270

self.tgt_name = tgt_name

271

self.src_name = src_name

272

self.tgt_ranges = tgt_ranges

273

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

274

self.tgt_sha1 = tgt_sha1

275

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

276

self.style = style

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

277

278

# We use OrderedDict rather than dict so that the output is repeatable;

279

# otherwise it would depend on the hash values of the Transfer objects.

280

self.goes_before = OrderedDict()

281

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

282

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

283

self.stash_before = []

284

self.use_stash = []

285

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

289

self._patch_info = None

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

290

291

@property

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

292

def patch_info(self):

293

return self._patch_info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

294

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

295

@patch_info.setter

296

def patch_info(self, info):

297

if info:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

298

assert self.style == "diff"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

299

self._patch_info = info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

300

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

301

def NetStashChange(self):

302

return (sum(sr.size() for (_, sr) in self.stash_before) -

303

sum(sr.size() for (_, sr) in self.use_stash))

304

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

305

def ConvertToNew(self):

306

assert self.style != "new"

307

self.use_stash = []

308

self.style = "new"

309

self.src_ranges = RangeSet()

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

310

self.patch_info = None

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

311

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

312

def __str__(self):

313

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

314

" to " + str(self.tgt_ranges) + ">")

315

316

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

317

@functools.total_ordering

318

class HeapItem(object):

319

def __init__(self, item):

320

self.item = item

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

321

# Negate the score since python's heap is a min-heap and we want the

322

# maximum score.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

323

self.score = -item.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

324

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

325

def clear(self):

326

self.item = None

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

327

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

328

def __bool__(self):

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

329

return self.item is not None

330

331

# Python 2 uses __nonzero__, while Python 3 uses __bool__.

332

__nonzero__ = __bool__

333

334

# The rest operations are generated by functools.total_ordering decorator.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

335

def __eq__(self, other):

336

return self.score == other.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

337

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

338

def __le__(self, other):

339

return self.score <= other.score

340

341

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

342

class ImgdiffStats(object):

343

"""A class that collects imgdiff stats.

344

345

It keeps track of the files that will be applied imgdiff while generating

346

BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific

347

reasons. The stats is only meaningful when imgdiff not being disabled by the

348

caller of BlockImageDiff. In addition, only files with supported types

349

(BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

350

"""

351

352

USED_IMGDIFF = "APK files diff'd with imgdiff"

353

USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"

354

355

# Reasons for not applying imgdiff on APKs.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

356

SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

357

SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

358

SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

359

360

# The list of valid reasons, which will also be the dumped order in a report.

361

REASONS = (

362

USED_IMGDIFF,

363

USED_IMGDIFF_LARGE_APK,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

364

SKIPPED_NONMONOTONIC,

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

365

SKIPPED_SHARED_BLOCKS,

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

366

SKIPPED_INCOMPLETE,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

)

def __init__(self):

self.stats = {}

def Log(self, filename, reason):

373

"""Logs why imgdiff can or cannot be applied to the given filename.

374

375

Args:

376

filename: The filename string.

377

reason: One of the reason constants listed in REASONS.

378

379

Raises:

380

AssertionError: On unsupported filetypes or invalid reason.

381

"""

382

assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)

383

assert reason in self.REASONS

384

385

if reason not in self.stats:

386

self.stats[reason] = set()

387

self.stats[reason].add(filename)

388

389

def Report(self):

390

"""Prints a report of the collected imgdiff stats."""

391

392

def print_header(header, separator):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

393

logger.info(header)

394

logger.info(separator * len(header) + '\n')

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

395

396

print_header(' Imgdiff Stats Report ', '=')

397

for key in self.REASONS:

398

if key not in self.stats:

399

continue

400

values = self.stats[key]

401

section_header = ' {} (count: {}) '.format(key, len(values))

402

print_header(section_header, '-')

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

403

logger.info(''.join([' {}\n'.format(name) for name in values]))

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

404

405

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

406

class BlockImageDiff(object):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

407

"""Generates the diff of two block image objects.

408

409

BlockImageDiff works on two image objects. An image object is anything that

410

provides the following attributes:

411

412

blocksize: the size in bytes of a block, currently must be 4096.

413

414

total_blocks: the total size of the partition/image, in blocks.

415

416

care_map: a RangeSet containing which blocks (in the range [0,

417

total_blocks) we actually care about; i.e. which blocks contain data.

418

419

file_map: a dict that partitions the blocks contained in care_map into

420

smaller domains that are useful for doing diffs on. (Typically a domain

421

is a file, and the key in file_map is the pathname.)

422

423

clobbered_blocks: a RangeSet containing which blocks contain data but may

424

be altered by the FS. They need to be excluded when verifying the

425

partition integrity.

426

427

ReadRangeSet(): a function that takes a RangeSet and returns the data

428

contained in the image blocks of that RangeSet. The data is returned as

429

a list or tuple of strings; concatenating the elements together should

430

produce the requested data. Implementations are free to break up the

431

data into list/tuple elements in any way that is convenient.

432

433

RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of

434

all the data in the specified range.

435

436

TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of

437

all the data in the image (ie, all the blocks in the care_map minus

438

clobbered_blocks, or including the clobbered blocks if

439

include_clobbered_blocks is True).

440

441

When creating a BlockImageDiff, the src image may be None, in which case the

442

list of transfers produced will never read from the original image.

443

"""

444

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

445

def __init__(self, tgt, src=None, threads=None, version=4,

446

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

447

if threads is None:

448

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

449

if threads == 0:

450

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

451

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

452

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

453

self.transfers = []

454

self.src_basenames = {}

455

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

456

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

457

self.touched_src_ranges = RangeSet()

458

self.touched_src_sha1 = None

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

459

self.disable_imgdiff = disable_imgdiff

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

460

self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

461

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

462

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

470

assert tgt.blocksize == 4096

471

assert src.blocksize == 4096

472

473

# The range sets in each filemap should comprise a partition of

474

# the care map.

475

self.AssertPartition(src.care_map, src.file_map.values())

476

self.AssertPartition(tgt.care_map, tgt.file_map.values())

477

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

478

@property

479

def max_stashed_size(self):

480

return self._max_stashed_size

481

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

482

@staticmethod

483

def FileTypeSupportedByImgdiff(filename):

484

"""Returns whether the file type is supported by imgdiff."""

485

return filename.lower().endswith(('.apk', '.jar', '.zip'))

486

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

487

def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

488

"""Checks whether we can apply imgdiff for the given RangeSets.

489

490

For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use

491

'imgdiff -z' if possible. Because it usually produces significantly smaller

492

patches than bsdiff.

493

494

This is permissible if all of the following conditions hold.

495

- The imgdiff hasn't been disabled by the caller (e.g. squashfs);

496

- The file type is supported by imgdiff;

497

- The source and target blocks are monotonic (i.e. the data is stored with

498

blocks in increasing order);

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

499

- Both files don't contain shared blocks;

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

500

- Both files have complete lists of blocks;

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

501

- We haven't removed any blocks from the source set.

502

503

If all these conditions are satisfied, concatenating all the blocks in the

504

RangeSet in order will produce a valid ZIP file (plus possibly extra zeros

505

in the last block). imgdiff is fine with extra zeros at the end of the file.

506

507

Args:

508

name: The filename to be diff'd.

509

tgt_ranges: The target RangeSet.

510

src_ranges: The source RangeSet.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

511

large_apk: Whether this is to split a large APK.

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

Returns:

A boolean result.

"""

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

516

if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

517

return False

518

519

if not tgt_ranges.monotonic or not src_ranges.monotonic:

520

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)

521

return False

522

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

523

if (tgt_ranges.extra.get('uses_shared_blocks') or

524

src_ranges.extra.get('uses_shared_blocks')):

525

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)

526

return False

527

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

528

if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):

529

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)

530

return False

531

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

532

reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk

533

else ImgdiffStats.USED_IMGDIFF)

534

self.imgdiff_stats.Log(name, reason)

535

return True

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

536

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

537

def Compute(self, prefix):

538

# When looking for a source file to use as the diff input for a

539

# target file, we try:

540

# 1) an exact path match if available, otherwise

541

# 2) a exact basename match if available, otherwise

542

# 3) a basename match after all runs of digits are replaced by

543

# "#" if available, otherwise

544

# 4) we have no source for this target.

545

self.AbbreviateSourceNames()

546

self.FindTransfers()

547

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

548

self.FindSequenceForTransfers()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

549

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

550

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

551

if common.OPTIONS.cache_size is not None:

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

552

stash_limit = (common.OPTIONS.cache_size *

553

common.OPTIONS.stash_threshold / self.tgt.blocksize)

554

# Ignore the stash limit and calculate the maximum simultaneously stashed

555

# blocks needed.

556

_, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)

557

558

# We cannot stash more blocks than the stash limit simultaneously. As a

559

# result, some 'diff' commands will be converted to new; leading to an

560

# unintended large package. To mitigate this issue, we can carefully

561

# choose the transfers for conversion. The number '1024' can be further

562

# tweaked here to balance the package size and build time.

563

if max_stashed_blocks > stash_limit + 1024:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

564

self.SelectAndConvertDiffTransfersToNew(

565

max_stashed_blocks - stash_limit)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

566

# Regenerate the sequence as the graph has changed.

567

self.FindSequenceForTransfers()

568

569

# Revise the stash size again to keep the size under limit.

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

570

self.ReviseStashSize()

571

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

572

# Double-check our work.

573

self.AssertSequenceGood()

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

574

self.AssertSha1Good()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

575

576

self.ComputePatches(prefix)

577

self.WriteTransfers(prefix)

578

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

579

# Report the imgdiff stats.

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

580

if not self.disable_imgdiff:

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

581

self.imgdiff_stats.Report()

582

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

583

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

584

def WriteSplitTransfers(out, style, target_blocks):

585

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

586

587

This prevents the target size of one command from being too large; and

588

might help to avoid fsync errors on some devices."""

589

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

590

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

591

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

592

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

593

while target_blocks:

594

blocks_to_write = target_blocks.first(blocks_limit)

595

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

596

total += blocks_to_write.size()

597

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

598

return total

599

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

600

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

601

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

602

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

603

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

604

# id. 'stashes' records the map from 'hash' to the ref count. The stash

605

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

606

stashes = {}

607

stashed_blocks = 0

608

max_stashed_blocks = 0

609

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

610

for xf in self.transfers:

611

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

612

for _, sr in xf.stash_before:

613

sh = self.src.RangeSha1(sr)

614

if sh in stashes:

615

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

616

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

617

stashes[sh] = 1

618

stashed_blocks += sr.size()

619

self.touched_src_ranges = self.touched_src_ranges.union(sr)

620

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

621

622

if stashed_blocks > max_stashed_blocks:

623

max_stashed_blocks = stashed_blocks

624

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

625

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

626

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

627

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

628

# <# blocks> <src ranges>

629

# OR

630

# <# blocks> <src ranges> <src locs> <stash refs...>

631

# OR

632

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

633

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

634

size = xf.src_ranges.size()

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

635

src_str_buffer = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

636

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

637

unstashed_src_ranges = xf.src_ranges

638

mapped_stashes = []

639

for _, sr in xf.use_stash:

640

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

641

sh = self.src.RangeSha1(sr)

642

sr = xf.src_ranges.map_within(sr)

643

mapped_stashes.append(sr)

644

assert sh in stashes

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

645

src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

646

stashes[sh] -= 1

647

if stashes[sh] == 0:

648

free_string.append("free %s\n" % (sh,))

649

free_size += sr.size()

650

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

651

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

652

if unstashed_src_ranges:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

653

src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

654

if xf.use_stash:

655

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

656

src_str_buffer.insert(2, mapped_unstashed.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

657

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

658

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

659

else:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

660

src_str_buffer.insert(1, "-")

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

661

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

662

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

663

src_str = " ".join(src_str_buffer)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

664

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

665

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

666

# zero <rangeset>

667

# new <rangeset>

668

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

669

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

670

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

671

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

672

673

tgt_size = xf.tgt_ranges.size()

674

675

if xf.style == "new":

676

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

677

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

678

total += tgt_size

679

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

680

assert xf.tgt_ranges

681

assert xf.src_ranges.size() == tgt_size

682

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

683

# take into account automatic stashing of overlapping blocks

684

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

685

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

686

if temp_stash_usage > max_stashed_blocks:

687

max_stashed_blocks = temp_stash_usage

688

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

689

self.touched_src_ranges = self.touched_src_ranges.union(

690

xf.src_ranges)

691

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

692

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

693

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

694

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

695

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

696

total += tgt_size

697

elif xf.style in ("bsdiff", "imgdiff"):

698

assert xf.tgt_ranges

699

assert xf.src_ranges

700

# take into account automatic stashing of overlapping blocks

701

if xf.src_ranges.overlaps(xf.tgt_ranges):

702

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

703

if temp_stash_usage > max_stashed_blocks:

704

max_stashed_blocks = temp_stash_usage

705

706

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

707

708

out.append("%s %d %d %s %s %s %s\n" % (

709

xf.style,

710

xf.patch_start, xf.patch_len,

711

xf.src_sha1,

712

xf.tgt_sha1,

713

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

714

total += tgt_size

715

elif xf.style == "zero":

716

assert xf.tgt_ranges

717

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

718

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

719

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

720

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

721

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

722

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

723

if free_string:

724

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

725

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

726

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

727

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

728

# Sanity check: abort if we're going to need more stash space than

729

# the allowed size (cache_size * threshold). There are two purposes

730

# of having a threshold here. a) Part of the cache may have been

731

# occupied by some recovery logs. b) It will buy us some time to deal

732

# with the oversize issue.

733

cache_size = common.OPTIONS.cache_size

734

stash_threshold = common.OPTIONS.stash_threshold

735

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

736

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

737

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

738

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

739

self.tgt.blocksize, max_allowed, cache_size,

740

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

741

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

742

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

743

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

744

if self.tgt.hashtree_info:

745

out.append("compute_hash_tree {} {} {} {} {}\n".format(

746

self.tgt.hashtree_info.hashtree_range.to_string_raw(),

747

self.tgt.hashtree_info.filesystem_range.to_string_raw(),

748

self.tgt.hashtree_info.hash_algorithm,

749

self.tgt.hashtree_info.salt,

750

self.tgt.hashtree_info.root_hash))

751

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

752

# Zero out extended blocks as a workaround for bug 20881595.

753

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

754

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

755

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

756

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

757

758

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

759

# data in the new image; b) will not be touched by dm-verity. Out of those

760

# blocks, we erase the ones that won't be used in this update at the

761

# beginning of an update. The rest would be erased at the end. This is to

762

# work around the eMMC issue observed on some devices, which may otherwise

763

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

764

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

765

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

766

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

767

768

erase_first = new_dontcare.subtract(self.touched_src_ranges)

769

if erase_first:

770

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

771

772

erase_last = new_dontcare.subtract(erase_first)

773

if erase_last:

774

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

775

776

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

777

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

778

# v3+: the number of stash slots is unused.

779

out.insert(2, "0\n")

780

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

781

782

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

786

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

787

OPTIONS = common.OPTIONS

788

if OPTIONS.cache_size is not None:

789

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

790

logger.info(

791

"max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",

792

max_stashed_blocks, self._max_stashed_size, max_allowed,

793

self._max_stashed_size * 100.0 / max_allowed)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

794

else:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

795

logger.info(

796

"max stashed blocks: %d (%d bytes), limit: <unknown>\n",

797

max_stashed_blocks, self._max_stashed_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

798

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

799

def ReviseStashSize(self, ignore_stash_limit=False):

800

""" Revises the transfers to keep the stash size within the size limit.

801

802

Iterates through the transfer list and calculates the stash size each

803

transfer generates. Converts the affected transfers to new if we reach the

stash limit.

Args:

ignore_stash_limit: Ignores the stash limit and calculates the max

808

simultaneous stashed blocks instead. No change will be made to the

809

transfer list with this flag.

810

811

Return:

812

A tuple of (tgt blocks converted to new, max stashed blocks)

813

"""

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

814

logger.info("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

815

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

816

817

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

818

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

819

for xf in self.transfers:

820

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

821

for stash_raw_id, sr in xf.stash_before:

822

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

823

824

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

825

for stash_raw_id, _ in xf.use_stash:

826

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

827

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

828

max_allowed_blocks = None

829

if not ignore_stash_limit:

830

# Compute the maximum blocks available for stash based on /cache size and

831

# the threshold.

832

cache_size = common.OPTIONS.cache_size

833

stash_threshold = common.OPTIONS.stash_threshold

834

max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

835

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

836

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

837

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

838

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

839

new_blocks = 0

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

840

max_stashed_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

841

842

# Now go through all the commands. Compute the required stash size on the

843

# fly. If a command requires excess stash than available, it deletes the

844

# stash by replacing the command that uses the stash with a "new" command

845

# instead.

846

for xf in self.transfers:

847

replaced_cmds = []

848

849

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

850

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

851

# Check the post-command stashed_blocks.

852

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

853

sh = self.src.RangeSha1(sr)

854

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

855

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

856

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

857

if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

858

# We cannot stash this one for a later command. Find out the command

859

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

860

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

861

replaced_cmds.append(use_cmd)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

862

logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

863

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

864

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

865

if sh in stashes:

866

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

867

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

868

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

869

stashed_blocks = stashed_blocks_after

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

870

max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

871

872

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

873

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

874

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

875

assert xf.tgt_ranges and xf.src_ranges

876

if xf.src_ranges.overlaps(xf.tgt_ranges):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

877

if (max_allowed_blocks and

878

stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

879

replaced_cmds.append(xf)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

880

logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

881

else:

882

# The whole source ranges will be stashed for implicit stashes.

883

max_stashed_blocks = max(max_stashed_blocks,

884

stashed_blocks + xf.src_ranges.size())

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

885

886

# Replace the commands in replaced_cmds with "new"s.

887

for cmd in replaced_cmds:

888

# It no longer uses any commands in "use_stash". Remove the def points

889

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

890

for stash_raw_id, sr in cmd.use_stash:

891

def_cmd = stash_map[stash_raw_id][1]

892

assert (stash_raw_id, sr) in def_cmd.stash_before

893

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

894

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

895

# Add up blocks that violates space limit and print total number to

896

# screen later.

897

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

898

cmd.ConvertToNew()

899

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

900

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

901

for _, sr in xf.use_stash:

902

sh = self.src.RangeSha1(sr)

903

assert sh in stashes

904

stashes[sh] -= 1

905

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

906

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

907

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

908

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

909

num_of_bytes = new_blocks * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

910

logger.info(

911

" Total %d blocks (%d bytes) are packed as new blocks due to "

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

912

"insufficient cache size. Maximum blocks stashed simultaneously: %d",

913

new_blocks, num_of_bytes, max_stashed_blocks)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

914

return new_blocks, max_stashed_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

915

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

916

def ComputePatches(self, prefix):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

917

logger.info("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

918

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

919

patch_num = 0

920

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

921

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

922

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

923

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

924

logger.info(

925

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

926

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

927

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

928

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

929

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

930

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

931

logger.info(

932

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

933

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

934

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

935

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

936

# We can't compare src and tgt directly because they may have

937

# the same content but be broken up into blocks differently, eg:

938

#

939

# ["he", "llo"] vs ["h", "ello"]

940

#

941

# We want those to compare equal, ideally without having to

942

# actually concatenate the strings (these may be tens of

943

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

944

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

945

# These are identical; we don't need to generate a patch,

946

# just issue copy commands on the device.

947

xf.style = "move"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

948

xf.patch_info = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

949

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

950

if xf.src_ranges != xf.tgt_ranges:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

951

logger.info(

952

"%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,

953

100.0, xf.style,

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

954

xf.tgt_name if xf.tgt_name == xf.src_name else (

955

xf.tgt_name + " (from " + xf.src_name + ")"),

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

956

str(xf.tgt_ranges), str(xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

957

else:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

958

if xf.patch_info:

959

# We have already generated the patch (e.g. during split of large

960

# APKs or reduction of stash size)

961

imgdiff = xf.patch_info.imgdiff

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

962

else:

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

963

imgdiff = self.CanUseImgdiff(

964

xf.tgt_name, xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

965

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

966

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

971

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

972

patches = self.ComputePatchesForInputList(diff_queue, False)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

973

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

974

offset = 0

975

with open(prefix + ".patch.dat", "wb") as patch_fd:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

976

for index, patch_info, _ in patches:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

977

xf = self.transfers[index]

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

978

xf.patch_len = len(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

979

xf.patch_start = offset

980

offset += xf.patch_len

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

981

patch_fd.write(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

982

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

983

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

984

logger.info(

985

"%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,

986

xf.patch_len * 100.0 / tgt_size, xf.style,

987

xf.tgt_name if xf.tgt_name == xf.src_name else (

988

xf.tgt_name + " (from " + xf.src_name + ")"),

989

xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

990

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

991

def AssertSha1Good(self):

992

"""Check the SHA-1 of the src & tgt blocks in the transfer list.

993

994

Double check the SHA-1 value to avoid the issue in b/71908713, where

995

SparseImage.RangeSha1() messed up with the hash calculation in multi-thread

996

environment. That specific problem has been fixed by protecting the

997

underlying generator function 'SparseImage._GetRangeData()' with lock.

998

"""

999

for xf in self.transfers:

1000

tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)

1001

assert xf.tgt_sha1 == tgt_sha1

1002

if xf.style == "diff":

1003

src_sha1 = self.src.RangeSha1(xf.src_ranges)

1004

assert xf.src_sha1 == src_sha1

1005

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1006

def AssertSequenceGood(self):

1007

# Simulate the sequences of transfers we will output, and check that:

1008

# - we never read a block after writing it, and

1009

# - we write every block we care about exactly once.

1010

1011

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1012

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1013

1014

# Imagine processing the transfers in order.

1015

for xf in self.transfers:

1016

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1017

1018

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1019

for _, sr in xf.use_stash:

1020

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1021

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1022

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

1023

# Source image could be larger. Don't check the blocks that are in the

1024

# source image only. Since they are not in 'touched', and won't ever

1025

# be touched.

1026

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1027

assert touched[i] == 0

1028

1029

# Check that the output blocks for this transfer haven't yet

1030

# been touched, and touch all the blocks written by this

1031

# transfer.

1032

for s, e in xf.tgt_ranges:

1033

for i in range(s, e):

1034

assert touched[i] == 0

1035

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1036

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

1037

if self.tgt.hashtree_info:

1038

for s, e in self.tgt.hashtree_info.hashtree_range:

1039

for i in range(s, e):

1040

assert touched[i] == 0

1041

touched[i] = 1

1042

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1043

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1044

for s, e in self.tgt.care_map:

1045

for i in range(s, e):

1046

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1047

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1048

def FindSequenceForTransfers(self):

1049

"""Finds a sequence for the given transfers.

1050

1051

The goal is to minimize the violation of order dependencies between these

1052

transfers, so that fewer blocks are stashed when applying the update.

1053

"""

1054

1055

# Clear the existing dependency between transfers

1056

for xf in self.transfers:

1057

xf.goes_before = OrderedDict()

1058

xf.goes_after = OrderedDict()

xf.stash_before = []

xf.use_stash = []

# Find the ordering dependencies among transfers (this is O(n^2)

1064

# in the number of transfers).

1065

self.GenerateDigraph()

1066

# Find a sequence of transfers that satisfies as many ordering

1067

# dependencies as possible (heuristically).

1068

self.FindVertexSequence()

1069

# Fix up the ordering dependencies that the sequence didn't

1070

# satisfy.

1071

self.ReverseBackwardEdges()

1072

self.ImproveVertexSequence()

1073

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1074

def ImproveVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1075

logger.info("Improving vertex order...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1076

1077

# At this point our digraph is acyclic; we reversed any edges that

1078

# were backwards in the heuristically-generated sequence. The

1079

# previously-generated order is still acceptable, but we hope to

1080

# find a better order that needs less memory for stashed data.

1081

# Now we do a topological sort to generate a new vertex order,

1082

# using a greedy algorithm to choose which vertex goes next

1083

# whenever we have a choice.

1084

1085

# Make a copy of the edge set; this copy will get destroyed by the

1086

# algorithm.

1087

for xf in self.transfers:

1088

xf.incoming = xf.goes_after.copy()

1089

xf.outgoing = xf.goes_before.copy()

1090

1091

L = [] # the new vertex order

1092

1093

# S is the set of sources in the remaining graph; we always choose

1094

# the one that leaves the least amount of stashed data after it's

1095

# executed.

1096

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

1102

L.append(xf)

1103

for u in xf.outgoing:

1104

del u.incoming[xf]

1105

if not u.incoming:

1106

heapq.heappush(S, (u.NetStashChange(), u.order, u))

1107

1108

# if this fails then our graph had a cycle.

1109

assert len(L) == len(self.transfers)

1110

1111

self.transfers = L

1112

for i, xf in enumerate(L):

1113

xf.order = i

1114

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1115

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1116

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

1117

1118

For each transfer, make sure it properly stashes the blocks it touches and

1119

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

1120

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

1121

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

1122

it is possible to have multiple pairs with different 'stash_raw_id's. Each

1123

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

1124

blocks will be written to the same stash slot in WriteTransfers().

1125

"""

1126

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1127

logger.info("Reversing backward edges...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1128

in_order = 0

1129

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1130

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1131

stash_size = 0

1132

1133

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1134

for u in xf.goes_before.copy():

1135

# xf should go before u

1136

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

1141

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

1146

assert overlap

1147

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1148

u.stash_before.append((stash_raw_id, overlap))

1149

xf.use_stash.append((stash_raw_id, overlap))

1150

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1151

stash_size += overlap.size()

1152

1153

# reverse the edge direction; now xf must go after u

1154

del xf.goes_before[u]

1155

del u.goes_after[xf]

1156

xf.goes_after[u] = None # value doesn't matter

1157

u.goes_before[xf] = None

1158

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1159

logger.info(

1160

" %d/%d dependencies (%.2f%%) were violated; %d source blocks "

1161

"stashed.", out_of_order, in_order + out_of_order,

1162

(out_of_order * 100.0 / (in_order + out_of_order)) if (

1163

in_order + out_of_order) else 0.0,

1164

stash_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1165

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1166

def FindVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1167

logger.info("Finding vertex sequence...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1168

1169

# This is based on "A Fast & Effective Heuristic for the Feedback

1170

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1171

# it as starting with the digraph G and moving all the vertices to

1172

# be on a horizontal line in some order, trying to minimize the

1173

# number of edges that end up pointing to the left. Left-pointing

1174

# edges will get removed to turn the digraph into a DAG. In this

1175

# case each edge has a weight which is the number of source blocks

1176

# we'll lose if that edge is removed; we try to minimize the total

1177

# weight rather than just the number of edges.

1178

1179

# Make a copy of the edge set; this copy will get destroyed by the

1180

# algorithm.

1181

for xf in self.transfers:

1182

xf.incoming = xf.goes_after.copy()

1183

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1184

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1185

1186

# We use an OrderedDict instead of just a set so that the output

1187

# is repeatable; otherwise it would depend on the hash values of

1188

# the transfer objects.

1189

G = OrderedDict()

1190

for xf in self.transfers:

1191

G[xf] = None

1192

s1 = deque() # the left side of the sequence, built from left to right

1193

s2 = deque() # the right side of the sequence, built from right to left

1194

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1195

heap = []

1196

for xf in self.transfers:

1197

xf.heap_item = HeapItem(xf)

1198

heap.append(xf.heap_item)

1199

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1200

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1201

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1202

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1203

# { key1: None, key2: None, ... }.

1204

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1205

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1206

1207

def adjust_score(iu, delta):

1208

iu.score += delta

1209

iu.heap_item.clear()

1210

iu.heap_item = HeapItem(iu)

1211

heapq.heappush(heap, iu.heap_item)

1212

1213

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1214

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1215

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1216

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1217

for u in sinks:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1218

if u not in G:

1219

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1220

s2.appendleft(u)

1221

del G[u]

1222

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1223

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1224

if not iu.outgoing:

1225

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1226

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1227

1228

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1229

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1230

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1231

for u in sources:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1232

if u not in G:

1233

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1234

s1.append(u)

1235

del G[u]

1236

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1237

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1238

if not iu.incoming:

1239

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1240

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1241

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1242

if not G:

1243

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1244

1245

# Find the "best" vertex to put next. "Best" is the one that

1246

# maximizes the net difference in source blocks saved we get by

1247

# pretending it's a source rather than a sink.

1248

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1249

while True:

1250

u = heapq.heappop(heap)

1251

if u and u.item in G:

1252

u = u.item

1253

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1254

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1255

s1.append(u)

1256

del G[u]

1257

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1258

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1259

if not iu.incoming:

1260

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1261

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1262

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1263

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1264

if not iu.outgoing:

1265

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1266

1267

# Now record the sequence in the 'order' field of each transfer,

1268

# and by rearranging self.transfers to be in the chosen sequence.

1269

1270

new_transfers = []

1271

for x in itertools.chain(s1, s2):

1272

x.order = len(new_transfers)

1273

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1278

1279

def GenerateDigraph(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1280

logger.info("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1281

1282

# Each item of source_ranges will be:

1283

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1284

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1285

source_ranges = []

1286

for b in self.transfers:

1287

for s, e in b.src_ranges:

1288

if e > len(source_ranges):

1289

source_ranges.extend([None] * (e-len(source_ranges)))

1290

for i in range(s, e):

1291

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1292

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1293

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1294

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1295

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1296

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1297

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1298

for s, e in a.tgt_ranges:

1299

for i in range(s, e):

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1300

if i >= len(source_ranges):

1301

break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1302

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1303

if source_ranges[i] is not None:

1304

for j in source_ranges[i]:

1305

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1306

1307

for b in intersections:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1308

if a is b:

1309

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1310

1311

# If the blocks written by A are read by B, then B needs to go before A.

1312

i = a.tgt_ranges.intersect(b.src_ranges)

1313

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1314

if b.src_name == "__ZERO":

1315

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1320

b.goes_before[a] = size

1321

a.goes_after[b] = size

1322

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1323

def ComputePatchesForInputList(self, diff_queue, compress_target):

1324

"""Returns a list of patch information for the input list of transfers.

1325

1326

Args:

1327

diff_queue: a list of transfers with style 'diff'

1328

compress_target: If True, compresses the target ranges of each

1329

transfers; and save the size.

1330

1331

Returns:

1332

A list of (transfer order, patch_info, compressed_size) tuples.

"""

if not diff_queue:

return []

if self.threads > 1:

logger.info("Computing patches (using %d threads)...", self.threads)

1340

else:

1341

logger.info("Computing patches...")

1342

1343

diff_total = len(diff_queue)

1344

patches = [None] * diff_total

1345

error_messages = []

1346

1347

# Using multiprocessing doesn't give additional benefits, due to the

1348

# pattern of the code. The diffing work is done by subprocess.call, which

1349

# already runs in a separate process (not affected much by the GIL -

1350

# Global Interpreter Lock). Using multiprocess also requires either a)

1351

# writing the diff input files in the main process before forking, or b)

1352

# reopening the image file (SparseImage) in the worker processes. Doing

1353

# neither of them further improves the performance.

1354

lock = threading.Lock()

def diff_worker():

while True:

with lock:

if not diff_queue:

return

xf_index, imgdiff, patch_index = diff_queue.pop()

1362

xf = self.transfers[xf_index]

1363

1364

message = []

1365

compressed_size = None

1366

1367

patch_info = xf.patch_info

1368

if not patch_info:

1369

src_file = common.MakeTempFile(prefix="src-")

1370

with open(src_file, "wb") as fd:

1371

self.src.WriteRangeDataToFd(xf.src_ranges, fd)

1372

1373

tgt_file = common.MakeTempFile(prefix="tgt-")

1374

with open(tgt_file, "wb") as fd:

1375

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)

1376

1377

try:

1378

patch_info = compute_patch(src_file, tgt_file, imgdiff)

1379

except ValueError as e:

1380

message.append(

1381

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

1382

"imgdiff" if imgdiff else "bsdiff",

1383

xf.tgt_name if xf.tgt_name == xf.src_name else

1384

xf.tgt_name + " (from " + xf.src_name + ")",

1385

xf.tgt_ranges, xf.src_ranges, e.message))

1386

1387

if compress_target:

1388

tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)

1389

try:

1390

# Compresses with the default level

1391

compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)

1392

compressed_data = (compress_obj.compress("".join(tgt_data))

1393

+ compress_obj.flush())

1394

compressed_size = len(compressed_data)

1395

except zlib.error as e:

1396

message.append(

1397

"Failed to compress the data in target range {} for {}:\n"

1398

"{}".format(xf.tgt_ranges, xf.tgt_name, e.message))

if message:

with lock:

error_messages.extend(message)

1403

1404

with lock:

1405

patches[patch_index] = (xf_index, patch_info, compressed_size)

1406

1407

threads = [threading.Thread(target=diff_worker)

1408

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

if error_messages:

logger.error('ERROR:')

1416

logger.error('\n'.join(error_messages))

1417

logger.error('\n\n\n')

sys.exit(1)

return patches

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1422

def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1423

"""Converts the diff transfers to reduce the max simultaneous stash.

1424

1425

Since the 'new' data is compressed with deflate, we can select the 'diff'

1426

transfers for conversion by comparing its patch size with the size of the

1427

compressed data. Ideally, we want to convert the transfers with a small

1428

size increase, but using a large number of stashed blocks.

1429

"""

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1430

TransferSizeScore = namedtuple("TransferSizeScore",

1431

"xf, used_stash_blocks, score")

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1432

1433

logger.info("Selecting diff commands to convert to new.")

1434

diff_queue = []

1435

for xf in self.transfers:

1436

if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:

1437

use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,

1438

xf.src_ranges)

1439

diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))

1440

1441

# Remove the 'move' transfers, and compute the patch & compressed size

1442

# for the remaining.

1443

result = self.ComputePatchesForInputList(diff_queue, True)

1444

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1445

conversion_candidates = []

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1446

for xf_index, patch_info, compressed_size in result:

1447

xf = self.transfers[xf_index]

1448

if not xf.patch_info:

1449

xf.patch_info = patch_info

1450

1451

size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size

1452

diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1453

logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1454

" compression_size: %d, ratio %.2f%%", xf.tgt_name,

1455

xf.tgt_ranges.size(), diff_style,

1456

len(xf.patch_info.content), compressed_size, size_ratio)

1457

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1458

used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1459

# Convert the transfer to new if the compressed size is smaller or equal.

1460

# We don't need to maintain the stash_before lists here because the

1461

# graph will be regenerated later.

1462

if len(xf.patch_info.content) >= compressed_size:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1463

# Add the transfer to the candidate list with negative score. And it

1464

# will be converted later.

1465

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1466

-1))

1467

elif used_stash_blocks > 0:

1468

# This heuristic represents the size increase in the final package to

1469

# remove per unit of stashed data.

1470

score = ((compressed_size - len(xf.patch_info.content)) * 100.0

1471

/ used_stash_blocks)

1472

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1473

score))

1474

# Transfers with lower score (i.e. less expensive to convert) will be

1475

# converted first.

1476

conversion_candidates.sort(key=lambda x: x.score)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1477

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1478

# TODO(xunchang), improve the logic to find the transfers to convert, e.g.

1479

# convert the ones that contribute to the max stash, run ReviseStashSize

1480

# multiple times etc.

1481

removed_stashed_blocks = 0

1482

for xf, used_stash_blocks, _ in conversion_candidates:

1483

logger.info("Converting %s to new", xf.tgt_name)

1484

xf.ConvertToNew()

1485

removed_stashed_blocks += used_stash_blocks

1486

# Experiments show that we will get a smaller package size if we remove

1487

# slightly more stashed blocks than the violated stash blocks.

1488

if removed_stashed_blocks >= violated_stash_blocks:

1489

break

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1490

1491

logger.info("Removed %d stashed blocks", removed_stashed_blocks)

1492

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1493

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1494

"""Parse the file_map to generate all the transfers."""

1495

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1496

def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1497

src_ranges, style, by_id):

1498

"""Add one or multiple Transfer()s by splitting large files.

1499

1500

For BBOTA v3, we need to stash source blocks for resumable feature.

1501

However, with the growth of file size and the shrink of the cache

1502

partition source blocks are too large to be stashed. If a file occupies

1503

too many blocks, we split it into smaller pieces by getting multiple

1504

Transfer()s.

1505

1506

The downside is that after splitting, we may increase the package size

1507

since the split pieces don't align well. According to our experiments,

1508

1/8 of the cache size as the per-piece limit appears to be optimal.

1509

Compared to the fixed 1024-block limit, it reduces the overall package

1510

size by 30% for volantis, and 20% for angler and bullhead."""

1511

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1512

pieces = 0

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1513

while (tgt_ranges.size() > max_blocks_per_transfer and

1514

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1515

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1516

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1517

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1518

src_first = src_ranges.first(max_blocks_per_transfer)

1519

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1520

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1521

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1522

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1523

1524

tgt_ranges = tgt_ranges.subtract(tgt_first)

1525

src_ranges = src_ranges.subtract(src_first)

1526

pieces += 1

1527

1528

# Handle remaining blocks.

1529

if tgt_ranges.size() or src_ranges.size():

1530

# Must be both non-empty.

1531

assert tgt_ranges.size() and src_ranges.size()

1532

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1533

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1534

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1535

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1536

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1537

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1538

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,

1539

by_id):

1540

"""Find all the zip files and split the others with a fixed chunk size.

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1541

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1542

This function will construct a list of zip archives, which will later be

1543

split by imgdiff to reduce the final patch size. For the other files,

1544

we will plainly split them based on a fixed chunk size with the potential

1545

patch size penalty.

1546

"""

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1547

1548

assert style == "diff"

1549

1550

# Change nothing for small files.

1551

if (tgt_ranges.size() <= max_blocks_per_transfer and

1552

src_ranges.size() <= max_blocks_per_transfer):

1553

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1554

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

style, by_id)

return

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

1558

# Split large APKs with imgdiff, if possible. We're intentionally checking

1559

# file types one more time (CanUseImgdiff() checks that as well), before

1560

# calling the costly RangeSha1()s.

1561

if (self.FileTypeSupportedByImgdiff(tgt_name) and

1562

self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

1563

if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1564

large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))

1565

return

1566

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1567

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1568

src_ranges, style, by_id)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1569

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1570

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1571

split=False):

1572

"""Wrapper function for adding a Transfer()."""

1573

1574

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1575

# otherwise add the Transfer() as is.

1576

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1577

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1578

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1579

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1580

return

1581

1582

# Handle .odex files specially to analyze the block-wise difference. If

1583

# most of the blocks are identical with only few changes (e.g. header),

1584

# we will patch the changed blocks only. This avoids stashing unchanged

1585

# blocks while patching. We limit the analysis to files without size

1586

# changes only. This is to avoid sacrificing the OTA generation cost too

1587

# much.

1588

if (tgt_name.split(".")[-1].lower() == 'odex' and

1589

tgt_ranges.size() == src_ranges.size()):

1590

1591

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1592

# few blocks remain identical, we lose the opportunity to use imgdiff

1593

# that may have better compression ratio than bsdiff.

1594

crop_threshold = 0.5

1595

1596

tgt_skipped = RangeSet()

1597

src_skipped = RangeSet()

1598

tgt_size = tgt_ranges.size()

1599

tgt_changed = 0

1600

for src_block, tgt_block in zip(src_ranges.next_item(),

1601

tgt_ranges.next_item()):

1602

src_rs = RangeSet(str(src_block))

1603

tgt_rs = RangeSet(str(tgt_block))

1604

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1605

tgt_skipped = tgt_skipped.union(tgt_rs)

1606

src_skipped = src_skipped.union(src_rs)

1607

else:

1608

tgt_changed += tgt_rs.size()

1609

1610

# Terminate early if no clear sign of benefits.

1611

if tgt_changed > tgt_size * crop_threshold:

1612

break

1613

1614

if tgt_changed < tgt_size * crop_threshold:

1615

assert tgt_changed + tgt_skipped.size() == tgt_size

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1616

logger.info(

1617

'%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,

1618

tgt_skipped.size() * 100.0 / tgt_size, tgt_name)

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1619

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1620

"%s-skipped" % (tgt_name,),

1621

"%s-skipped" % (src_name,),

1622

tgt_skipped, src_skipped, style, by_id)

1623

1624

# Intentionally change the file extension to avoid being imgdiff'd as

1625

# the files are no longer in their original format.

1626

tgt_name = "%s-cropped" % (tgt_name,)

1627

src_name = "%s-cropped" % (src_name,)

1628

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1629

src_ranges = src_ranges.subtract(src_skipped)

1630

1631

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1636

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1637

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1638

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1639

def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,

1640

split_info):

1641

"""Parse the split_info and return a list of info tuples.

1642

1643

Args:

1644

patch_size: total size of the patch file.

1645

tgt_ranges: Ranges of the target file within the original image.

1646

src_ranges: Ranges of the source file within the original image.

split_info format:

imgdiff version#

count of pieces

<patch_size_1> <tgt_size_1> <src_ranges_1>

1651

...

1652

<patch_size_n> <tgt_size_n> <src_ranges_n>

1653

1654

Returns:

1655

[patch_start, patch_len, split_tgt_ranges, split_src_ranges]

1656

"""

1657

1658

version = int(split_info[0])

1659

assert version == 2

1660

count = int(split_info[1])

1661

assert len(split_info) - 2 == count

split_info_list = []

patch_start = 0

tgt_remain = copy.deepcopy(tgt_ranges)

1666

# each line has the format <patch_size>, <tgt_size>, <src_ranges>

1667

for line in split_info[2:]:

1668

info = line.split()

1669

assert len(info) == 3

1670

patch_length = int(info[0])

1671

1672

split_tgt_size = int(info[1])

1673

assert split_tgt_size % 4096 == 0

1674

assert split_tgt_size / 4096 <= tgt_remain.size()

1675

split_tgt_ranges = tgt_remain.first(split_tgt_size / 4096)

1676

tgt_remain = tgt_remain.subtract(split_tgt_ranges)

1677

1678

# Find the split_src_ranges within the image file from its relative

1679

# position in file.

1680

split_src_indices = RangeSet.parse_raw(info[2])

1681

split_src_ranges = RangeSet()

1682

for r in split_src_indices:

1683

curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))

1684

assert not split_src_ranges.overlaps(curr_range)

1685

split_src_ranges = split_src_ranges.union(curr_range)

1686

1687

split_info_list.append((patch_start, patch_length,

1688

split_tgt_ranges, split_src_ranges))

1689

patch_start += patch_length

1690

1691

# Check that the sizes of all the split pieces add up to the final file

1692

# size for patch and target.

1693

assert tgt_remain.size() == 0

1694

assert patch_start == patch_size

1695

return split_info_list

1696

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1697

def SplitLargeApks():

1698

"""Split the large apks files.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1699

1700

Example: Chrome.apk will be split into

1701

src-0: Chrome.apk-0, tgt-0: Chrome.apk-0

1702

src-1: Chrome.apk-1, tgt-1: Chrome.apk-1

1703

...

1704

1705

After the split, the target pieces are continuous and block aligned; and

1706

the source pieces are mutually exclusive. During the split, we also

1707

generate and save the image patch between src-X & tgt-X. This patch will

1708

be valid because the block ranges of src-X & tgt-X will always stay the

1709

same afterwards; but there's a chance we don't use the patch if we

1710

convert the "diff" command into "new" or "move" later.

"""

while True:

with transfer_lock:

if not large_apks:

return

tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)

1718

1719

src_file = common.MakeTempFile(prefix="src-")

1720

tgt_file = common.MakeTempFile(prefix="tgt-")

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

1721

with open(src_file, "wb") as src_fd:

1722

self.src.WriteRangeDataToFd(src_ranges, src_fd)

1723

with open(tgt_file, "wb") as tgt_fd:

1724

self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1725

1726

patch_file = common.MakeTempFile(prefix="patch-")

1727

patch_info_file = common.MakeTempFile(prefix="split_info-")

1728

cmd = ["imgdiff", "-z",

1729

"--block-limit={}".format(max_blocks_per_transfer),

1730

"--split-info=" + patch_info_file,

1731

src_file, tgt_file, patch_file]

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

1732

proc = common.Run(cmd)

1733

imgdiff_output, _ = proc.communicate()

1734

assert proc.returncode == 0, \

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

1735

"Failed to create imgdiff patch between {} and {}:\n{}".format(

1736

src_name, tgt_name, imgdiff_output)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1737

1738

with open(patch_info_file) as patch_info:

1739

lines = patch_info.readlines()

1740

1741

patch_size_total = os.path.getsize(patch_file)

1742

split_info_list = ParseAndValidateSplitInfo(patch_size_total,

1743

tgt_ranges, src_ranges,

1744

lines)

1745

for index, (patch_start, patch_length, split_tgt_ranges,

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1746

split_src_ranges) in enumerate(split_info_list):

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1747

with open(patch_file) as f:

1748

f.seek(patch_start)

1749

patch_content = f.read(patch_length)

1750

1751

split_src_name = "{}-{}".format(src_name, index)

1752

split_tgt_name = "{}-{}".format(tgt_name, index)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1753

split_large_apks.append((split_tgt_name,

split_src_name,

split_tgt_ranges,

split_src_ranges,

patch_content))

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1758

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1759

logger.info("Finding transfers...")

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1760

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1761

large_apks = []

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1762

split_large_apks = []

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1763

cache_size = common.OPTIONS.cache_size

1764

split_threshold = 0.125

1765

max_blocks_per_transfer = int(cache_size * split_threshold /

1766

self.tgt.blocksize)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1767

empty = RangeSet()

Tianjie Xu

20a86cd

2018-01-12 12:21:00 -0800

[diff] [blame]

1768

for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1769

if tgt_fn == "__ZERO":

1770

# the special "__ZERO" domain is all the blocks not contained

1771

# in any file and that are filled with zeros. We have a

1772

# special transfer style for zero blocks.

1773

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1774

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1775

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1776

continue

1777

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1778

elif tgt_fn == "__COPY":

1779

# "__COPY" domain includes all the blocks not contained in any

1780

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1781

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1782

continue

1783

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

1784

elif tgt_fn == "__HASHTREE":

1785

continue

1786

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1787

elif tgt_fn in self.src.file_map:

1788

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1789

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1790

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1791

continue

1792

1793

b = os.path.basename(tgt_fn)

1794

if b in self.src_basenames:

1795

# Look for an exact basename match in the source.

1796

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1797

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1798

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1799

continue

1800

1801

b = re.sub("[0-9]+", "#", b)

1802

if b in self.src_numpatterns:

1803

# Look for a 'number pattern' match (a basename match after

1804

# all runs of digits are replaced by "#"). (This is useful

1805

# for .so files that contain version numbers in the filename

1806

# that get bumped.)

1807

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1808

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1809

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1810

continue

1811

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1812

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1813

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1814

transfer_lock = threading.Lock()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1815

threads = [threading.Thread(target=SplitLargeApks)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1816

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1822

# Sort the split transfers for large apks to generate a determinate package.

1823

split_large_apks.sort()

1824

for (tgt_name, src_name, tgt_ranges, src_ranges,

1825

patch) in split_large_apks:

1826

transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1827

self.tgt.RangeSha1(tgt_ranges),

1828

self.src.RangeSha1(src_ranges),

1829

"diff", self.transfers)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1830

transfer_split.patch_info = PatchInfo(True, patch)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1831

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1832

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1833

for k in self.src.file_map.keys():

1834

b = os.path.basename(k)

1835

self.src_basenames[b] = k

1836

b = re.sub("[0-9]+", "#", b)

1837

self.src_numpatterns[b] = k

1838

1839

@staticmethod

1840

def AssertPartition(total, seq):

1841

"""Assert that all the RangeSets in 'seq' form a partition of the

1842

'total' RangeSet (ie, they are nonintersecting and their union

1843

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1844

Doug Zongker