Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

18

import copy

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

20

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

21

import itertools

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

22

import logging

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

23

import multiprocessing

24

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

25

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

27

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

29

import zlib

30

from collections import deque, namedtuple, OrderedDict

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

31

from hashlib import sha1

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

32

33

import common

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

34

from rangelib import RangeSet

35

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

36

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

37

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

38

logger = logging.getLogger(__name__)

39

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

40

# The tuple contains the style and bytes of a bsdiff|imgdiff patch.

41

PatchInfo = namedtuple("PatchInfo", ["imgdiff", "content"])

42

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

43

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

44

def compute_patch(srcfile, tgtfile, imgdiff=False):

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

45

"""Calls bsdiff|imgdiff to compute the patch data, returns a PatchInfo."""

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

46

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

47

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

48

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

49

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

50

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

51

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

52

# here, since they contain temp filenames only.

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

53

proc = common.Run(cmd, verbose=False)

54

output, _ = proc.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

55

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

56

if proc.returncode != 0:

Tianjie Xu

b59c17f

2016-10-28 17:55:53 -0700

[diff] [blame]

57

raise ValueError(output)

58

59

with open(patchfile, 'rb') as f:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

60

return PatchInfo(imgdiff, f.read())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

61

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

62

63

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

64

def RangeSha1(self, ranges):

65

raise NotImplementedError

66

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

67

def ReadRangeSet(self, ranges):

68

raise NotImplementedError

69

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

70

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

71

raise NotImplementedError

72

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

73

def WriteRangeDataToFd(self, ranges, fd):

74

raise NotImplementedError

75

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

76

77

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

78

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

79

80

def __init__(self):

81

self.blocksize = 4096

82

self.care_map = RangeSet()

83

self.clobbered_blocks = RangeSet()

84

self.extended = RangeSet()

85

self.total_blocks = 0

86

self.file_map = {}

Yifan Hong

bb2658d

2019-01-25 12:30:58 -0800

[diff] [blame]

87

self.hashtree_info = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

88

89

def RangeSha1(self, ranges):

90

return sha1().hexdigest()

91

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

92

def ReadRangeSet(self, ranges):

93

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

94

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

95

def TotalSha1(self, include_clobbered_blocks=False):

96

# EmptyImage always carries empty clobbered_blocks, so

97

# include_clobbered_blocks can be ignored.

98

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

99

return sha1().hexdigest()

100

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

101

def WriteRangeDataToFd(self, ranges, fd):

102

raise ValueError("Can't write data from EmptyImage to file")

103

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

104

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

105

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

106

"""An image wrapped around a single string of data."""

107

108

def __init__(self, data, trim=False, pad=False):

109

self.data = data

110

self.blocksize = 4096

111

112

assert not (trim and pad)

113

114

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

115

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

116

if partial > 0:

117

if trim:

118

self.data = self.data[:-partial]

119

elif pad:

120

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

121

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

122

else:

123

raise ValueError(("data for DataImage must be multiple of %d bytes "

124

"unless trim or pad is specified") %

125

(self.blocksize,))

126

127

assert len(self.data) % self.blocksize == 0

128

Tao Bao

2019-06-19 14:15:34 -0700

[diff] [blame^]

129

self.total_blocks = len(self.data) // self.blocksize

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

130

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

131

# When the last block is padded, we always write the whole block even for

132

# incremental OTAs. Because otherwise the last block may get skipped if

133

# unchanged for an incremental, but would fail the post-install

134

# verification if it has non-zero contents in the padding bytes.

135

# Bug: 23828506

136

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

137

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

138

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

139

clobbered_blocks = []

140

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

141

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

146

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

147

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

148

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

149

if d == reference:

150

zero_blocks.append(i)

151

zero_blocks.append(i+1)

152

else:

153

nonzero_blocks.append(i)

154

nonzero_blocks.append(i+1)

155

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

156

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

157

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

158

self.file_map = dict()

159

if zero_blocks:

160

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

161

if nonzero_blocks:

162

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

163

if clobbered_blocks:

164

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

165

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

166

def _GetRangeData(self, ranges):

167

for s, e in ranges:

168

yield self.data[s*self.blocksize:e*self.blocksize]

169

170

def RangeSha1(self, ranges):

171

h = sha1()

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

172

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

176

def ReadRangeSet(self, ranges):

Yifan Hong

6f3eaeb

2019-04-09 16:49:33 -0700

[diff] [blame]

177

return list(self._GetRangeData(ranges))

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

178

Tao Bao

68658c0

2015-06-01 13:40:49 -0700

[diff] [blame]

179

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

180

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

181

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2019-06-19 14:15:34 -0700

[diff] [blame^]

182

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

183

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

184

def WriteRangeDataToFd(self, ranges, fd):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

185

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

186

fd.write(data)

187

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

188

Yifan Hong

2019-04-04 15:37:57 -0700

[diff] [blame]

189

class FileImage(Image):

190

"""An image wrapped around a raw image file."""

191

192

def __init__(self, path, hashtree_info_generator=None):

193

self.path = path

194

self.blocksize = 4096

195

self._file_size = os.path.getsize(self.path)

Tao Bao

2019-06-19 14:15:34 -0700

[diff] [blame^]

196

self._file = open(self.path, 'rb')

Yifan Hong

2019-04-04 15:37:57 -0700

[diff] [blame]

197

198

if self._file_size % self.blocksize != 0:

199

raise ValueError("Size of file %s must be multiple of %d bytes, but is %d"

200

% self.path, self.blocksize, self._file_size)

201

Tao Bao

2019-06-19 14:15:34 -0700

[diff] [blame^]

202

self.total_blocks = self._file_size // self.blocksize

Yifan Hong

2019-04-04 15:37:57 -0700

[diff] [blame]

203

self.care_map = RangeSet(data=(0, self.total_blocks))

204

self.clobbered_blocks = RangeSet()

205

self.extended = RangeSet()

206

Yifan Hong

55988c4

2019-04-12 15:01:12 -0700

[diff] [blame]

207

self.generator_lock = threading.Lock()

208

Yifan Hong

2019-04-04 15:37:57 -0700

[diff] [blame]

209

self.hashtree_info = None

210

if hashtree_info_generator:

211

self.hashtree_info = hashtree_info_generator.Generate(self)

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

216

217

for i in range(self.total_blocks):

218

d = self._file.read(self.blocksize)

219

if d == reference:

220

zero_blocks.append(i)

221

zero_blocks.append(i+1)

222

else:

223

nonzero_blocks.append(i)

224

nonzero_blocks.append(i+1)

225

226

assert zero_blocks or nonzero_blocks

self.file_map = {}

if zero_blocks:

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

231

if nonzero_blocks:

232

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

233

if self.hashtree_info:

234

self.file_map["__HASHTREE"] = self.hashtree_info.hashtree_range

def __del__(self):

self._file.close()

def _GetRangeData(self, ranges):

Yifan Hong

55988c4

2019-04-12 15:01:12 -0700

[diff] [blame]

240

# Use a lock to protect the generator so that we will not run two

241

# instances of this generator on the same object simultaneously.

242

with self.generator_lock:

243

for s, e in ranges:

244

self._file.seek(s * self.blocksize)

245

for _ in range(s, e):

246

yield self._file.read(self.blocksize)

Yifan Hong

2019-04-04 15:37:57 -0700

[diff] [blame]

247

248

def RangeSha1(self, ranges):

249

h = sha1()

250

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

h.update(data)

return h.hexdigest()

def ReadRangeSet(self, ranges):

255

return list(self._GetRangeData(ranges))

256

257

def TotalSha1(self, include_clobbered_blocks=False):

258

assert not self.clobbered_blocks

259

return self.RangeSha1(self.care_map)

260

261

def WriteRangeDataToFd(self, ranges, fd):

262

for data in self._GetRangeData(ranges): # pylint: disable=not-an-iterable

fd.write(data)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

266

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

267

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

268

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

269

self.tgt_name = tgt_name

270

self.src_name = src_name

271

self.tgt_ranges = tgt_ranges

272

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

273

self.tgt_sha1 = tgt_sha1

274

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

275

self.style = style

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

276

277

# We use OrderedDict rather than dict so that the output is repeatable;

278

# otherwise it would depend on the hash values of the Transfer objects.

279

self.goes_before = OrderedDict()

280

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

281

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

282

self.stash_before = []

283

self.use_stash = []

284

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

288

self._patch_info = None

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

289

290

@property

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

291

def patch_info(self):

292

return self._patch_info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

293

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

294

@patch_info.setter

295

def patch_info(self, info):

296

if info:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

297

assert self.style == "diff"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

298

self._patch_info = info

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

299

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

300

def NetStashChange(self):

301

return (sum(sr.size() for (_, sr) in self.stash_before) -

302

sum(sr.size() for (_, sr) in self.use_stash))

303

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

304

def ConvertToNew(self):

305

assert self.style != "new"

306

self.use_stash = []

307

self.style = "new"

308

self.src_ranges = RangeSet()

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

309

self.patch_info = None

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

310

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

311

def __str__(self):

312

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

313

" to " + str(self.tgt_ranges) + ">")

314

315

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

316

@functools.total_ordering

317

class HeapItem(object):

318

def __init__(self, item):

319

self.item = item

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

320

# Negate the score since python's heap is a min-heap and we want the

321

# maximum score.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

322

self.score = -item.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

323

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

324

def clear(self):

325

self.item = None

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

326

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

327

def __bool__(self):

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

328

return self.item is not None

329

330

# Python 2 uses __nonzero__, while Python 3 uses __bool__.

331

__nonzero__ = __bool__

332

333

# The rest operations are generated by functools.total_ordering decorator.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

334

def __eq__(self, other):

335

return self.score == other.score

Tao Bao

2017-12-23 11:50:52 -0800

[diff] [blame]

336

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

337

def __le__(self, other):

338

return self.score <= other.score

339

340

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

341

class ImgdiffStats(object):

342

"""A class that collects imgdiff stats.

343

344

It keeps track of the files that will be applied imgdiff while generating

345

BlockImageDiff. It also logs the ones that cannot use imgdiff, with specific

346

reasons. The stats is only meaningful when imgdiff not being disabled by the

347

caller of BlockImageDiff. In addition, only files with supported types

348

(BlockImageDiff.FileTypeSupportedByImgdiff()) are allowed to be logged.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

349

"""

350

351

USED_IMGDIFF = "APK files diff'd with imgdiff"

352

USED_IMGDIFF_LARGE_APK = "Large APK files split and diff'd with imgdiff"

353

354

# Reasons for not applying imgdiff on APKs.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

355

SKIPPED_NONMONOTONIC = "Not used imgdiff due to having non-monotonic ranges"

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

356

SKIPPED_SHARED_BLOCKS = "Not used imgdiff due to using shared blocks"

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

357

SKIPPED_INCOMPLETE = "Not used imgdiff due to incomplete RangeSet"

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

358

359

# The list of valid reasons, which will also be the dumped order in a report.

360

REASONS = (

361

USED_IMGDIFF,

362

USED_IMGDIFF_LARGE_APK,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

363

SKIPPED_NONMONOTONIC,

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

364

SKIPPED_SHARED_BLOCKS,

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

365

SKIPPED_INCOMPLETE,

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

)

def __init__(self):

self.stats = {}

def Log(self, filename, reason):

372

"""Logs why imgdiff can or cannot be applied to the given filename.

373

374

Args:

375

filename: The filename string.

376

reason: One of the reason constants listed in REASONS.

377

378

Raises:

379

AssertionError: On unsupported filetypes or invalid reason.

380

"""

381

assert BlockImageDiff.FileTypeSupportedByImgdiff(filename)

382

assert reason in self.REASONS

383

384

if reason not in self.stats:

385

self.stats[reason] = set()

386

self.stats[reason].add(filename)

387

388

def Report(self):

389

"""Prints a report of the collected imgdiff stats."""

390

391

def print_header(header, separator):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

392

logger.info(header)

Tao Bao

2019-06-19 14:15:34 -0700

[diff] [blame^]

393

logger.info('%s\n', separator * len(header))

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

394

395

print_header(' Imgdiff Stats Report ', '=')

396

for key in self.REASONS:

397

if key not in self.stats:

398

continue

399

values = self.stats[key]

400

section_header = ' {} (count: {}) '.format(key, len(values))

401

print_header(section_header, '-')

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

402

logger.info(''.join([' {}\n'.format(name) for name in values]))

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

403

404

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

405

class BlockImageDiff(object):

Tao Bao

76def24

2017-11-21 09:25:31 -0800

[diff] [blame]

406

"""Generates the diff of two block image objects.

407

408

BlockImageDiff works on two image objects. An image object is anything that

409

provides the following attributes:

410

411

blocksize: the size in bytes of a block, currently must be 4096.

412

413

total_blocks: the total size of the partition/image, in blocks.

414

415

care_map: a RangeSet containing which blocks (in the range [0,

416

total_blocks) we actually care about; i.e. which blocks contain data.

417

418

file_map: a dict that partitions the blocks contained in care_map into

419

smaller domains that are useful for doing diffs on. (Typically a domain

420

is a file, and the key in file_map is the pathname.)

421

422

clobbered_blocks: a RangeSet containing which blocks contain data but may

423

be altered by the FS. They need to be excluded when verifying the

424

partition integrity.

425

426

ReadRangeSet(): a function that takes a RangeSet and returns the data

427

contained in the image blocks of that RangeSet. The data is returned as

428

a list or tuple of strings; concatenating the elements together should

429

produce the requested data. Implementations are free to break up the

430

data into list/tuple elements in any way that is convenient.

431

432

RangeSha1(): a function that returns (as a hex string) the SHA-1 hash of

433

all the data in the specified range.

434

435

TotalSha1(): a function that returns (as a hex string) the SHA-1 hash of

436

all the data in the image (ie, all the blocks in the care_map minus

437

clobbered_blocks, or including the clobbered blocks if

438

include_clobbered_blocks is True).

439

440

When creating a BlockImageDiff, the src image may be None, in which case the

441

list of transfers produced will never read from the original image.

442

"""

443

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

444

def __init__(self, tgt, src=None, threads=None, version=4,

445

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

446

if threads is None:

447

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

448

if threads == 0:

449

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

450

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

451

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

452

self.transfers = []

453

self.src_basenames = {}

454

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

455

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

456

self.touched_src_ranges = RangeSet()

457

self.touched_src_sha1 = None

Tao Bao

293fd13

2016-06-11 12:19:23 -0700

[diff] [blame]

458

self.disable_imgdiff = disable_imgdiff

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

459

self.imgdiff_stats = ImgdiffStats() if not disable_imgdiff else None

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

460

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

461

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

469

assert tgt.blocksize == 4096

470

assert src.blocksize == 4096

471

472

# The range sets in each filemap should comprise a partition of

473

# the care map.

474

self.AssertPartition(src.care_map, src.file_map.values())

475

self.AssertPartition(tgt.care_map, tgt.file_map.values())

476

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

477

@property

478

def max_stashed_size(self):

479

return self._max_stashed_size

480

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

481

@staticmethod

482

def FileTypeSupportedByImgdiff(filename):

483

"""Returns whether the file type is supported by imgdiff."""

484

return filename.lower().endswith(('.apk', '.jar', '.zip'))

485

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

486

def CanUseImgdiff(self, name, tgt_ranges, src_ranges, large_apk=False):

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

487

"""Checks whether we can apply imgdiff for the given RangeSets.

488

489

For files in ZIP format (e.g., APKs, JARs, etc.) we would like to use

490

'imgdiff -z' if possible. Because it usually produces significantly smaller

491

patches than bsdiff.

492

493

This is permissible if all of the following conditions hold.

494

- The imgdiff hasn't been disabled by the caller (e.g. squashfs);

495

- The file type is supported by imgdiff;

496

- The source and target blocks are monotonic (i.e. the data is stored with

497

blocks in increasing order);

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

498

- Both files don't contain shared blocks;

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

499

- Both files have complete lists of blocks;

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

500

- We haven't removed any blocks from the source set.

501

502

If all these conditions are satisfied, concatenating all the blocks in the

503

RangeSet in order will produce a valid ZIP file (plus possibly extra zeros

504

in the last block). imgdiff is fine with extra zeros at the end of the file.

505

506

Args:

507

name: The filename to be diff'd.

508

tgt_ranges: The target RangeSet.

509

src_ranges: The source RangeSet.

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

510

large_apk: Whether this is to split a large APK.

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

Returns:

A boolean result.

"""

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

515

if self.disable_imgdiff or not self.FileTypeSupportedByImgdiff(name):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

516

return False

517

518

if not tgt_ranges.monotonic or not src_ranges.monotonic:

519

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_NONMONOTONIC)

520

return False

521

Tao Bao

2018-02-07 12:40:00 -0800

[diff] [blame]

522

if (tgt_ranges.extra.get('uses_shared_blocks') or

523

src_ranges.extra.get('uses_shared_blocks')):

524

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_SHARED_BLOCKS)

525

return False

526

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

527

if tgt_ranges.extra.get('incomplete') or src_ranges.extra.get('incomplete'):

528

self.imgdiff_stats.Log(name, ImgdiffStats.SKIPPED_INCOMPLETE)

529

return False

530

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

531

reason = (ImgdiffStats.USED_IMGDIFF_LARGE_APK if large_apk

532

else ImgdiffStats.USED_IMGDIFF)

533

self.imgdiff_stats.Log(name, reason)

534

return True

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

535

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

536

def Compute(self, prefix):

537

# When looking for a source file to use as the diff input for a

538

# target file, we try:

539

# 1) an exact path match if available, otherwise

540

# 2) a exact basename match if available, otherwise

541

# 3) a basename match after all runs of digits are replaced by

542

# "#" if available, otherwise

543

# 4) we have no source for this target.

544

self.AbbreviateSourceNames()

545

self.FindTransfers()

546

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

547

self.FindSequenceForTransfers()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

548

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

549

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

550

if common.OPTIONS.cache_size is not None:

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

551

stash_limit = (common.OPTIONS.cache_size *

552

common.OPTIONS.stash_threshold / self.tgt.blocksize)

553

# Ignore the stash limit and calculate the maximum simultaneously stashed

554

# blocks needed.

555

_, max_stashed_blocks = self.ReviseStashSize(ignore_stash_limit=True)

556

557

# We cannot stash more blocks than the stash limit simultaneously. As a

558

# result, some 'diff' commands will be converted to new; leading to an

559

# unintended large package. To mitigate this issue, we can carefully

560

# choose the transfers for conversion. The number '1024' can be further

561

# tweaked here to balance the package size and build time.

562

if max_stashed_blocks > stash_limit + 1024:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

563

self.SelectAndConvertDiffTransfersToNew(

564

max_stashed_blocks - stash_limit)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

565

# Regenerate the sequence as the graph has changed.

566

self.FindSequenceForTransfers()

567

568

# Revise the stash size again to keep the size under limit.

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

569

self.ReviseStashSize()

570

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

571

# Double-check our work.

572

self.AssertSequenceGood()

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

573

self.AssertSha1Good()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

574

575

self.ComputePatches(prefix)

576

self.WriteTransfers(prefix)

577

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

578

# Report the imgdiff stats.

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

579

if not self.disable_imgdiff:

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

580

self.imgdiff_stats.Report()

581

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

582

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

583

def WriteSplitTransfers(out, style, target_blocks):

584

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

585

586

This prevents the target size of one command from being too large; and

587

might help to avoid fsync errors on some devices."""

588

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

589

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

590

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

591

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

592

while target_blocks:

593

blocks_to_write = target_blocks.first(blocks_limit)

594

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

595

total += blocks_to_write.size()

596

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

597

return total

598

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

599

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

600

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

601

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

602

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

603

# id. 'stashes' records the map from 'hash' to the ref count. The stash

604

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

605

stashes = {}

606

stashed_blocks = 0

607

max_stashed_blocks = 0

608

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

609

for xf in self.transfers:

610

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

611

for _, sr in xf.stash_before:

612

sh = self.src.RangeSha1(sr)

613

if sh in stashes:

614

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

615

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

616

stashes[sh] = 1

617

stashed_blocks += sr.size()

618

self.touched_src_ranges = self.touched_src_ranges.union(sr)

619

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

620

621

if stashed_blocks > max_stashed_blocks:

622

max_stashed_blocks = stashed_blocks

623

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

624

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

625

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

626

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

627

# <# blocks> <src ranges>

628

# OR

629

# <# blocks> <src ranges> <src locs> <stash refs...>

630

# OR

631

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

632

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

633

size = xf.src_ranges.size()

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

634

src_str_buffer = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

635

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

636

unstashed_src_ranges = xf.src_ranges

637

mapped_stashes = []

638

for _, sr in xf.use_stash:

639

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

640

sh = self.src.RangeSha1(sr)

641

sr = xf.src_ranges.map_within(sr)

642

mapped_stashes.append(sr)

643

assert sh in stashes

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

644

src_str_buffer.append("%s:%s" % (sh, sr.to_string_raw()))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

645

stashes[sh] -= 1

646

if stashes[sh] == 0:

647

free_string.append("free %s\n" % (sh,))

648

free_size += sr.size()

649

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

650

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

651

if unstashed_src_ranges:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

652

src_str_buffer.insert(1, unstashed_src_ranges.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

653

if xf.use_stash:

654

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

655

src_str_buffer.insert(2, mapped_unstashed.to_string_raw())

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

656

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

657

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

658

else:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

659

src_str_buffer.insert(1, "-")

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

660

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

661

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

662

src_str = " ".join(src_str_buffer)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

663

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

664

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

665

# zero <rangeset>

666

# new <rangeset>

667

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

668

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

669

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

670

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

671

672

tgt_size = xf.tgt_ranges.size()

673

674

if xf.style == "new":

675

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

676

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

677

total += tgt_size

678

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

679

assert xf.tgt_ranges

680

assert xf.src_ranges.size() == tgt_size

681

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

682

# take into account automatic stashing of overlapping blocks

683

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

684

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

685

if temp_stash_usage > max_stashed_blocks:

686

max_stashed_blocks = temp_stash_usage

687

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

688

self.touched_src_ranges = self.touched_src_ranges.union(

689

xf.src_ranges)

690

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

691

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

692

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

693

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

694

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

695

total += tgt_size

696

elif xf.style in ("bsdiff", "imgdiff"):

697

assert xf.tgt_ranges

698

assert xf.src_ranges

699

# take into account automatic stashing of overlapping blocks

700

if xf.src_ranges.overlaps(xf.tgt_ranges):

701

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

702

if temp_stash_usage > max_stashed_blocks:

703

max_stashed_blocks = temp_stash_usage

704

705

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

706

707

out.append("%s %d %d %s %s %s %s\n" % (

708

xf.style,

709

xf.patch_start, xf.patch_len,

710

xf.src_sha1,

711

xf.tgt_sha1,

712

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

713

total += tgt_size

714

elif xf.style == "zero":

715

assert xf.tgt_ranges

716

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

717

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

718

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

719

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

720

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

721

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

722

if free_string:

723

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

724

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

725

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

726

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

727

# Sanity check: abort if we're going to need more stash space than

728

# the allowed size (cache_size * threshold). There are two purposes

729

# of having a threshold here. a) Part of the cache may have been

730

# occupied by some recovery logs. b) It will buy us some time to deal

731

# with the oversize issue.

732

cache_size = common.OPTIONS.cache_size

733

stash_threshold = common.OPTIONS.stash_threshold

734

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

735

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

736

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

737

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

738

self.tgt.blocksize, max_allowed, cache_size,

739

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

740

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

741

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

742

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

743

if self.tgt.hashtree_info:

744

out.append("compute_hash_tree {} {} {} {} {}\n".format(

745

self.tgt.hashtree_info.hashtree_range.to_string_raw(),

746

self.tgt.hashtree_info.filesystem_range.to_string_raw(),

747

self.tgt.hashtree_info.hash_algorithm,

748

self.tgt.hashtree_info.salt,

749

self.tgt.hashtree_info.root_hash))

750

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

751

# Zero out extended blocks as a workaround for bug 20881595.

752

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

753

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

754

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

755

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

756

757

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

758

# data in the new image; b) will not be touched by dm-verity. Out of those

759

# blocks, we erase the ones that won't be used in this update at the

760

# beginning of an update. The rest would be erased at the end. This is to

761

# work around the eMMC issue observed on some devices, which may otherwise

762

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

763

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

764

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

765

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

766

767

erase_first = new_dontcare.subtract(self.touched_src_ranges)

768

if erase_first:

769

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

770

771

erase_last = new_dontcare.subtract(erase_first)

772

if erase_last:

773

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

774

775

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

776

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

777

# v3+: the number of stash slots is unused.

778

out.insert(2, "0\n")

779

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

780

Tao Bao

2019-06-19 14:15:34 -0700

[diff] [blame^]

781

with open(prefix + ".transfer.list", "w") as f:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

785

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

786

OPTIONS = common.OPTIONS

787

if OPTIONS.cache_size is not None:

788

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

789

logger.info(

790

"max stashed blocks: %d (%d bytes), limit: %d bytes (%.2f%%)\n",

791

max_stashed_blocks, self._max_stashed_size, max_allowed,

792

self._max_stashed_size * 100.0 / max_allowed)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

793

else:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

794

logger.info(

795

"max stashed blocks: %d (%d bytes), limit: <unknown>\n",

796

max_stashed_blocks, self._max_stashed_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

797

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

798

def ReviseStashSize(self, ignore_stash_limit=False):

799

""" Revises the transfers to keep the stash size within the size limit.

800

801

Iterates through the transfer list and calculates the stash size each

802

transfer generates. Converts the affected transfers to new if we reach the

stash limit.

Args:

ignore_stash_limit: Ignores the stash limit and calculates the max

807

simultaneous stashed blocks instead. No change will be made to the

808

transfer list with this flag.

809

810

Return:

811

A tuple of (tgt blocks converted to new, max stashed blocks)

812

"""

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

813

logger.info("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

814

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

815

816

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

817

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

818

for xf in self.transfers:

819

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

820

for stash_raw_id, sr in xf.stash_before:

821

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

822

823

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

824

for stash_raw_id, _ in xf.use_stash:

825

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

826

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

827

max_allowed_blocks = None

828

if not ignore_stash_limit:

829

# Compute the maximum blocks available for stash based on /cache size and

830

# the threshold.

831

cache_size = common.OPTIONS.cache_size

832

stash_threshold = common.OPTIONS.stash_threshold

833

max_allowed_blocks = cache_size * stash_threshold / self.tgt.blocksize

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

834

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

835

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

836

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

837

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

838

new_blocks = 0

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

839

max_stashed_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

840

841

# Now go through all the commands. Compute the required stash size on the

842

# fly. If a command requires excess stash than available, it deletes the

843

# stash by replacing the command that uses the stash with a "new" command

844

# instead.

845

for xf in self.transfers:

846

replaced_cmds = []

847

848

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

849

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

850

# Check the post-command stashed_blocks.

851

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

852

sh = self.src.RangeSha1(sr)

853

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

854

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

855

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

856

if max_allowed_blocks and stashed_blocks_after > max_allowed_blocks:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

857

# We cannot stash this one for a later command. Find out the command

858

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

859

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

860

replaced_cmds.append(use_cmd)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

861

logger.info("%10d %9s %s", sr.size(), "explicit", use_cmd)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

862

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

863

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

864

if sh in stashes:

865

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

866

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

867

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

868

stashed_blocks = stashed_blocks_after

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

869

max_stashed_blocks = max(max_stashed_blocks, stashed_blocks)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

870

871

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

872

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

873

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

874

assert xf.tgt_ranges and xf.src_ranges

875

if xf.src_ranges.overlaps(xf.tgt_ranges):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

876

if (max_allowed_blocks and

877

stashed_blocks + xf.src_ranges.size() > max_allowed_blocks):

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

878

replaced_cmds.append(xf)

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

879

logger.info("%10d %9s %s", xf.src_ranges.size(), "implicit", xf)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

880

else:

881

# The whole source ranges will be stashed for implicit stashes.

882

max_stashed_blocks = max(max_stashed_blocks,

883

stashed_blocks + xf.src_ranges.size())

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

884

885

# Replace the commands in replaced_cmds with "new"s.

886

for cmd in replaced_cmds:

887

# It no longer uses any commands in "use_stash". Remove the def points

888

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

889

for stash_raw_id, sr in cmd.use_stash:

890

def_cmd = stash_map[stash_raw_id][1]

891

assert (stash_raw_id, sr) in def_cmd.stash_before

892

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

893

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

894

# Add up blocks that violates space limit and print total number to

895

# screen later.

896

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

897

cmd.ConvertToNew()

898

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

899

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

900

for _, sr in xf.use_stash:

901

sh = self.src.RangeSha1(sr)

902

assert sh in stashes

903

stashes[sh] -= 1

904

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

905

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

906

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

907

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

908

num_of_bytes = new_blocks * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

909

logger.info(

910

" Total %d blocks (%d bytes) are packed as new blocks due to "

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

911

"insufficient cache size. Maximum blocks stashed simultaneously: %d",

912

new_blocks, num_of_bytes, max_stashed_blocks)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

913

return new_blocks, max_stashed_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

914

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

915

def ComputePatches(self, prefix):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

916

logger.info("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

917

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

918

patch_num = 0

919

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

920

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

921

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

922

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

923

logger.info(

924

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

925

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

926

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

927

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

928

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

929

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

930

logger.info(

931

"%10d %10d (%6.2f%%) %7s %s %s", tgt_size, tgt_size, 100.0,

932

xf.style, xf.tgt_name, str(xf.tgt_ranges))

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

933

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

934

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

935

# We can't compare src and tgt directly because they may have

936

# the same content but be broken up into blocks differently, eg:

937

#

938

# ["he", "llo"] vs ["h", "ello"]

939

#

940

# We want those to compare equal, ideally without having to

941

# actually concatenate the strings (these may be tens of

942

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

943

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

944

# These are identical; we don't need to generate a patch,

945

# just issue copy commands on the device.

946

xf.style = "move"

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

947

xf.patch_info = None

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

948

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

949

if xf.src_ranges != xf.tgt_ranges:

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

950

logger.info(

951

"%10d %10d (%6.2f%%) %7s %s %s (from %s)", tgt_size, tgt_size,

952

100.0, xf.style,

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

953

xf.tgt_name if xf.tgt_name == xf.src_name else (

954

xf.tgt_name + " (from " + xf.src_name + ")"),

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

955

str(xf.tgt_ranges), str(xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

956

else:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

957

if xf.patch_info:

958

# We have already generated the patch (e.g. during split of large

959

# APKs or reduction of stash size)

960

imgdiff = xf.patch_info.imgdiff

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

961

else:

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

962

imgdiff = self.CanUseImgdiff(

963

xf.tgt_name, xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

964

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

965

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

970

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

971

patches = self.ComputePatchesForInputList(diff_queue, False)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

972

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

973

offset = 0

974

with open(prefix + ".patch.dat", "wb") as patch_fd:

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

975

for index, patch_info, _ in patches:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

976

xf = self.transfers[index]

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

977

xf.patch_len = len(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

978

xf.patch_start = offset

979

offset += xf.patch_len

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

980

patch_fd.write(patch_info.content)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

981

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

982

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

983

logger.info(

984

"%10d %10d (%6.2f%%) %7s %s %s %s", xf.patch_len, tgt_size,

985

xf.patch_len * 100.0 / tgt_size, xf.style,

986

xf.tgt_name if xf.tgt_name == xf.src_name else (

987

xf.tgt_name + " (from " + xf.src_name + ")"),

988

xf.tgt_ranges, xf.src_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

989

Tianjie Xu

8a7ed9f

2018-01-23 14:06:11 -0800

[diff] [blame]

990

def AssertSha1Good(self):

991

"""Check the SHA-1 of the src & tgt blocks in the transfer list.

992

993

Double check the SHA-1 value to avoid the issue in b/71908713, where

994

SparseImage.RangeSha1() messed up with the hash calculation in multi-thread

995

environment. That specific problem has been fixed by protecting the

996

underlying generator function 'SparseImage._GetRangeData()' with lock.

997

"""

998

for xf in self.transfers:

999

tgt_sha1 = self.tgt.RangeSha1(xf.tgt_ranges)

1000

assert xf.tgt_sha1 == tgt_sha1

1001

if xf.style == "diff":

1002

src_sha1 = self.src.RangeSha1(xf.src_ranges)

1003

assert xf.src_sha1 == src_sha1

1004

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1005

def AssertSequenceGood(self):

1006

# Simulate the sequences of transfers we will output, and check that:

1007

# - we never read a block after writing it, and

1008

# - we write every block we care about exactly once.

1009

1010

# Start with no blocks having been touched yet.

Tao Bao

2019-06-19 14:15:34 -0700

[diff] [blame^]

1011

touched = array.array("B", b"\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1012

1013

# Imagine processing the transfers in order.

1014

for xf in self.transfers:

1015

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1016

1017

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1018

for _, sr in xf.use_stash:

1019

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1020

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1021

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

1022

# Source image could be larger. Don't check the blocks that are in the

1023

# source image only. Since they are not in 'touched', and won't ever

1024

# be touched.

1025

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1026

assert touched[i] == 0

1027

1028

# Check that the output blocks for this transfer haven't yet

1029

# been touched, and touch all the blocks written by this

1030

# transfer.

1031

for s, e in xf.tgt_ranges:

1032

for i in range(s, e):

1033

assert touched[i] == 0

1034

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1035

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

1036

if self.tgt.hashtree_info:

1037

for s, e in self.tgt.hashtree_info.hashtree_range:

1038

for i in range(s, e):

1039

assert touched[i] == 0

1040

touched[i] = 1

1041

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1042

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1043

for s, e in self.tgt.care_map:

1044

for i in range(s, e):

1045

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1046

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1047

def FindSequenceForTransfers(self):

1048

"""Finds a sequence for the given transfers.

1049

1050

The goal is to minimize the violation of order dependencies between these

1051

transfers, so that fewer blocks are stashed when applying the update.

1052

"""

1053

1054

# Clear the existing dependency between transfers

1055

for xf in self.transfers:

1056

xf.goes_before = OrderedDict()

1057

xf.goes_after = OrderedDict()

xf.stash_before = []

xf.use_stash = []

# Find the ordering dependencies among transfers (this is O(n^2)

1063

# in the number of transfers).

1064

self.GenerateDigraph()

1065

# Find a sequence of transfers that satisfies as many ordering

1066

# dependencies as possible (heuristically).

1067

self.FindVertexSequence()

1068

# Fix up the ordering dependencies that the sequence didn't

1069

# satisfy.

1070

self.ReverseBackwardEdges()

1071

self.ImproveVertexSequence()

1072

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1073

def ImproveVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1074

logger.info("Improving vertex order...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1075

1076

# At this point our digraph is acyclic; we reversed any edges that

1077

# were backwards in the heuristically-generated sequence. The

1078

# previously-generated order is still acceptable, but we hope to

1079

# find a better order that needs less memory for stashed data.

1080

# Now we do a topological sort to generate a new vertex order,

1081

# using a greedy algorithm to choose which vertex goes next

1082

# whenever we have a choice.

1083

1084

# Make a copy of the edge set; this copy will get destroyed by the

1085

# algorithm.

1086

for xf in self.transfers:

1087

xf.incoming = xf.goes_after.copy()

1088

xf.outgoing = xf.goes_before.copy()

1089

1090

L = [] # the new vertex order

1091

1092

# S is the set of sources in the remaining graph; we always choose

1093

# the one that leaves the least amount of stashed data after it's

1094

# executed.

1095

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

1101

L.append(xf)

1102

for u in xf.outgoing:

1103

del u.incoming[xf]

1104

if not u.incoming:

1105

heapq.heappush(S, (u.NetStashChange(), u.order, u))

1106

1107

# if this fails then our graph had a cycle.

1108

assert len(L) == len(self.transfers)

1109

1110

self.transfers = L

1111

for i, xf in enumerate(L):

1112

xf.order = i

1113

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1114

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1115

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

1116

1117

For each transfer, make sure it properly stashes the blocks it touches and

1118

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

1119

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

1120

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

1121

it is possible to have multiple pairs with different 'stash_raw_id's. Each

1122

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

1123

blocks will be written to the same stash slot in WriteTransfers().

1124

"""

1125

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1126

logger.info("Reversing backward edges...")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1127

in_order = 0

1128

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1129

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1130

stash_size = 0

1131

1132

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1133

for u in xf.goes_before.copy():

1134

# xf should go before u

1135

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

1140

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

1145

assert overlap

1146

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

1147

u.stash_before.append((stash_raw_id, overlap))

1148

xf.use_stash.append((stash_raw_id, overlap))

1149

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1150

stash_size += overlap.size()

1151

1152

# reverse the edge direction; now xf must go after u

1153

del xf.goes_before[u]

1154

del u.goes_after[xf]

1155

xf.goes_after[u] = None # value doesn't matter

1156

u.goes_before[xf] = None

1157

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1158

logger.info(

1159

" %d/%d dependencies (%.2f%%) were violated; %d source blocks "

1160

"stashed.", out_of_order, in_order + out_of_order,

1161

(out_of_order * 100.0 / (in_order + out_of_order)) if (

1162

in_order + out_of_order) else 0.0,

1163

stash_size)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

1164

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1165

def FindVertexSequence(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1166

logger.info("Finding vertex sequence...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1167

1168

# This is based on "A Fast & Effective Heuristic for the Feedback

1169

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1170

# it as starting with the digraph G and moving all the vertices to

1171

# be on a horizontal line in some order, trying to minimize the

1172

# number of edges that end up pointing to the left. Left-pointing

1173

# edges will get removed to turn the digraph into a DAG. In this

1174

# case each edge has a weight which is the number of source blocks

1175

# we'll lose if that edge is removed; we try to minimize the total

1176

# weight rather than just the number of edges.

1177

1178

# Make a copy of the edge set; this copy will get destroyed by the

1179

# algorithm.

1180

for xf in self.transfers:

1181

xf.incoming = xf.goes_after.copy()

1182

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1183

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1184

1185

# We use an OrderedDict instead of just a set so that the output

1186

# is repeatable; otherwise it would depend on the hash values of

1187

# the transfer objects.

1188

G = OrderedDict()

1189

for xf in self.transfers:

1190

G[xf] = None

1191

s1 = deque() # the left side of the sequence, built from left to right

1192

s2 = deque() # the right side of the sequence, built from right to left

1193

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1194

heap = []

1195

for xf in self.transfers:

1196

xf.heap_item = HeapItem(xf)

1197

heap.append(xf.heap_item)

1198

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1199

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1200

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1201

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1202

# { key1: None, key2: None, ... }.

1203

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1204

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1205

1206

def adjust_score(iu, delta):

1207

iu.score += delta

1208

iu.heap_item.clear()

1209

iu.heap_item = HeapItem(iu)

1210

heapq.heappush(heap, iu.heap_item)

1211

1212

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1213

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1214

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1215

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1216

for u in sinks:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1217

if u not in G:

1218

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1219

s2.appendleft(u)

1220

del G[u]

1221

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1222

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1223

if not iu.outgoing:

1224

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1225

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1226

1227

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1228

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1229

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1230

for u in sources:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1231

if u not in G:

1232

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1233

s1.append(u)

1234

del G[u]

1235

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1236

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1237

if not iu.incoming:

1238

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1239

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1240

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1241

if not G:

1242

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1243

1244

# Find the "best" vertex to put next. "Best" is the one that

1245

# maximizes the net difference in source blocks saved we get by

1246

# pretending it's a source rather than a sink.

1247

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1248

while True:

1249

u = heapq.heappop(heap)

1250

if u and u.item in G:

1251

u = u.item

1252

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1253

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1254

s1.append(u)

1255

del G[u]

1256

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1257

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1258

if not iu.incoming:

1259

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1260

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1261

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1262

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1263

if not iu.outgoing:

1264

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1265

1266

# Now record the sequence in the 'order' field of each transfer,

1267

# and by rearranging self.transfers to be in the chosen sequence.

1268

1269

new_transfers = []

1270

for x in itertools.chain(s1, s2):

1271

x.order = len(new_transfers)

1272

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1277

1278

def GenerateDigraph(self):

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1279

logger.info("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1280

1281

# Each item of source_ranges will be:

1282

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1283

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1284

source_ranges = []

1285

for b in self.transfers:

1286

for s, e in b.src_ranges:

1287

if e > len(source_ranges):

1288

source_ranges.extend([None] * (e-len(source_ranges)))

1289

for i in range(s, e):

1290

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1291

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1292

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1293

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1294

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1295

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1296

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1297

for s, e in a.tgt_ranges:

1298

for i in range(s, e):

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1299

if i >= len(source_ranges):

1300

break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1301

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1302

if source_ranges[i] is not None:

1303

for j in source_ranges[i]:

1304

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1305

1306

for b in intersections:

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1307

if a is b:

1308

continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1309

1310

# If the blocks written by A are read by B, then B needs to go before A.

1311

i = a.tgt_ranges.intersect(b.src_ranges)

1312

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1313

if b.src_name == "__ZERO":

1314

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1319

b.goes_before[a] = size

1320

a.goes_after[b] = size

1321

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1322

def ComputePatchesForInputList(self, diff_queue, compress_target):

1323

"""Returns a list of patch information for the input list of transfers.

1324

1325

Args:

1326

diff_queue: a list of transfers with style 'diff'

1327

compress_target: If True, compresses the target ranges of each

1328

transfers; and save the size.

1329

1330

Returns:

1331

A list of (transfer order, patch_info, compressed_size) tuples.

"""

if not diff_queue:

return []

if self.threads > 1:

logger.info("Computing patches (using %d threads)...", self.threads)

1339

else:

1340

logger.info("Computing patches...")

1341

1342

diff_total = len(diff_queue)

1343

patches = [None] * diff_total

1344

error_messages = []

1345

1346

# Using multiprocessing doesn't give additional benefits, due to the

1347

# pattern of the code. The diffing work is done by subprocess.call, which

1348

# already runs in a separate process (not affected much by the GIL -

1349

# Global Interpreter Lock). Using multiprocess also requires either a)

1350

# writing the diff input files in the main process before forking, or b)

1351

# reopening the image file (SparseImage) in the worker processes. Doing

1352

# neither of them further improves the performance.

1353

lock = threading.Lock()

def diff_worker():

while True:

with lock:

if not diff_queue:

return

xf_index, imgdiff, patch_index = diff_queue.pop()

1361

xf = self.transfers[xf_index]

1362

1363

message = []

1364

compressed_size = None

1365

1366

patch_info = xf.patch_info

1367

if not patch_info:

1368

src_file = common.MakeTempFile(prefix="src-")

1369

with open(src_file, "wb") as fd:

1370

self.src.WriteRangeDataToFd(xf.src_ranges, fd)

1371

1372

tgt_file = common.MakeTempFile(prefix="tgt-")

1373

with open(tgt_file, "wb") as fd:

1374

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, fd)

1375

1376

try:

1377

patch_info = compute_patch(src_file, tgt_file, imgdiff)

1378

except ValueError as e:

1379

message.append(

1380

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

1381

"imgdiff" if imgdiff else "bsdiff",

1382

xf.tgt_name if xf.tgt_name == xf.src_name else

1383

xf.tgt_name + " (from " + xf.src_name + ")",

1384

xf.tgt_ranges, xf.src_ranges, e.message))

1385

1386

if compress_target:

1387

tgt_data = self.tgt.ReadRangeSet(xf.tgt_ranges)

1388

try:

1389

# Compresses with the default level

1390

compress_obj = zlib.compressobj(6, zlib.DEFLATED, -zlib.MAX_WBITS)

1391

compressed_data = (compress_obj.compress("".join(tgt_data))

1392

+ compress_obj.flush())

1393

compressed_size = len(compressed_data)

1394

except zlib.error as e:

1395

message.append(

1396

"Failed to compress the data in target range {} for {}:\n"

1397

"{}".format(xf.tgt_ranges, xf.tgt_name, e.message))

if message:

with lock:

error_messages.extend(message)

1402

1403

with lock:

1404

patches[patch_index] = (xf_index, patch_info, compressed_size)

1405

1406

threads = [threading.Thread(target=diff_worker)

1407

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

if error_messages:

logger.error('ERROR:')

1415

logger.error('\n'.join(error_messages))

1416

logger.error('\n\n\n')

sys.exit(1)

return patches

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1421

def SelectAndConvertDiffTransfersToNew(self, violated_stash_blocks):

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1422

"""Converts the diff transfers to reduce the max simultaneous stash.

1423

1424

Since the 'new' data is compressed with deflate, we can select the 'diff'

1425

transfers for conversion by comparing its patch size with the size of the

1426

compressed data. Ideally, we want to convert the transfers with a small

1427

size increase, but using a large number of stashed blocks.

1428

"""

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1429

TransferSizeScore = namedtuple("TransferSizeScore",

1430

"xf, used_stash_blocks, score")

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1431

1432

logger.info("Selecting diff commands to convert to new.")

1433

diff_queue = []

1434

for xf in self.transfers:

1435

if xf.style == "diff" and xf.src_sha1 != xf.tgt_sha1:

1436

use_imgdiff = self.CanUseImgdiff(xf.tgt_name, xf.tgt_ranges,

1437

xf.src_ranges)

1438

diff_queue.append((xf.order, use_imgdiff, len(diff_queue)))

1439

1440

# Remove the 'move' transfers, and compute the patch & compressed size

1441

# for the remaining.

1442

result = self.ComputePatchesForInputList(diff_queue, True)

1443

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1444

conversion_candidates = []

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1445

for xf_index, patch_info, compressed_size in result:

1446

xf = self.transfers[xf_index]

1447

if not xf.patch_info:

1448

xf.patch_info = patch_info

1449

1450

size_ratio = len(xf.patch_info.content) * 100.0 / compressed_size

1451

diff_style = "imgdiff" if xf.patch_info.imgdiff else "bsdiff"

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1452

logger.info("%s, target size: %d blocks, style: %s, patch size: %d,"

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1453

" compression_size: %d, ratio %.2f%%", xf.tgt_name,

1454

xf.tgt_ranges.size(), diff_style,

1455

len(xf.patch_info.content), compressed_size, size_ratio)

1456

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1457

used_stash_blocks = sum(sr.size() for _, sr in xf.use_stash)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1458

# Convert the transfer to new if the compressed size is smaller or equal.

1459

# We don't need to maintain the stash_before lists here because the

1460

# graph will be regenerated later.

1461

if len(xf.patch_info.content) >= compressed_size:

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1462

# Add the transfer to the candidate list with negative score. And it

1463

# will be converted later.

1464

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1465

-1))

1466

elif used_stash_blocks > 0:

1467

# This heuristic represents the size increase in the final package to

1468

# remove per unit of stashed data.

1469

score = ((compressed_size - len(xf.patch_info.content)) * 100.0

1470

/ used_stash_blocks)

1471

conversion_candidates.append(TransferSizeScore(xf, used_stash_blocks,

1472

score))

1473

# Transfers with lower score (i.e. less expensive to convert) will be

1474

# converted first.

1475

conversion_candidates.sort(key=lambda x: x.score)

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1476

xunchang

2018-12-06 16:39:46 -0800

[diff] [blame]

1477

# TODO(xunchang), improve the logic to find the transfers to convert, e.g.

1478

# convert the ones that contribute to the max stash, run ReviseStashSize

1479

# multiple times etc.

1480

removed_stashed_blocks = 0

1481

for xf, used_stash_blocks, _ in conversion_candidates:

1482

logger.info("Converting %s to new", xf.tgt_name)

1483

xf.ConvertToNew()

1484

removed_stashed_blocks += used_stash_blocks

1485

# Experiments show that we will get a smaller package size if we remove

1486

# slightly more stashed blocks than the violated stash blocks.

1487

if removed_stashed_blocks >= violated_stash_blocks:

1488

break

xunchang

2018-12-06 15:03:45 -0800

[diff] [blame]

1489

1490

logger.info("Removed %d stashed blocks", removed_stashed_blocks)

1491

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1492

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1493

"""Parse the file_map to generate all the transfers."""

1494

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1495

def AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1496

src_ranges, style, by_id):

1497

"""Add one or multiple Transfer()s by splitting large files.

1498

1499

For BBOTA v3, we need to stash source blocks for resumable feature.

1500

However, with the growth of file size and the shrink of the cache

1501

partition source blocks are too large to be stashed. If a file occupies

1502

too many blocks, we split it into smaller pieces by getting multiple

1503

Transfer()s.

1504

1505

The downside is that after splitting, we may increase the package size

1506

since the split pieces don't align well. According to our experiments,

1507

1/8 of the cache size as the per-piece limit appears to be optimal.

1508

Compared to the fixed 1024-block limit, it reduces the overall package

1509

size by 30% for volantis, and 20% for angler and bullhead."""

1510

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1511

pieces = 0

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1512

while (tgt_ranges.size() > max_blocks_per_transfer and

1513

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1514

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1515

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

bb86e1d

2016-01-13 16:14:10 -0800

[diff] [blame]

1516

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1517

src_first = src_ranges.first(max_blocks_per_transfer)

1518

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1519

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1520

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1521

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1522

1523

tgt_ranges = tgt_ranges.subtract(tgt_first)

1524

src_ranges = src_ranges.subtract(src_first)

1525

pieces += 1

1526

1527

# Handle remaining blocks.

1528

if tgt_ranges.size() or src_ranges.size():

1529

# Must be both non-empty.

1530

assert tgt_ranges.size() and src_ranges.size()

1531

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1532

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1533

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1534

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1535

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1536

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1537

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges, style,

1538

by_id):

1539

"""Find all the zip files and split the others with a fixed chunk size.

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1540

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1541

This function will construct a list of zip archives, which will later be

1542

split by imgdiff to reduce the final patch size. For the other files,

1543

we will plainly split them based on a fixed chunk size with the potential

1544

patch size penalty.

1545

"""

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1546

1547

assert style == "diff"

1548

1549

# Change nothing for small files.

1550

if (tgt_ranges.size() <= max_blocks_per_transfer and

1551

src_ranges.size() <= max_blocks_per_transfer):

1552

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1553

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

style, by_id)

return

Tao Bao

2018-01-31 17:32:40 -0800

[diff] [blame]

1557

# Split large APKs with imgdiff, if possible. We're intentionally checking

1558

# file types one more time (CanUseImgdiff() checks that as well), before

1559

# calling the costly RangeSha1()s.

1560

if (self.FileTypeSupportedByImgdiff(tgt_name) and

1561

self.tgt.RangeSha1(tgt_ranges) != self.src.RangeSha1(src_ranges)):

Tao Bao

2018-02-08 23:21:52 -0800

[diff] [blame]

1562

if self.CanUseImgdiff(tgt_name, tgt_ranges, src_ranges, True):

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1563

large_apks.append((tgt_name, src_name, tgt_ranges, src_ranges))

1564

return

1565

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1566

AddSplitTransfersWithFixedSizeChunks(tgt_name, src_name, tgt_ranges,

1567

src_ranges, style, by_id)

Tianjie Xu

2017-11-21 19:38:03 -0800

[diff] [blame]

1568

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1569

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1570

split=False):

1571

"""Wrapper function for adding a Transfer()."""

1572

1573

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1574

# otherwise add the Transfer() as is.

1575

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1576

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1577

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1578

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1579

return

1580

1581

# Handle .odex files specially to analyze the block-wise difference. If

1582

# most of the blocks are identical with only few changes (e.g. header),

1583

# we will patch the changed blocks only. This avoids stashing unchanged

1584

# blocks while patching. We limit the analysis to files without size

1585

# changes only. This is to avoid sacrificing the OTA generation cost too

1586

# much.

1587

if (tgt_name.split(".")[-1].lower() == 'odex' and

1588

tgt_ranges.size() == src_ranges.size()):

1589

1590

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1591

# few blocks remain identical, we lose the opportunity to use imgdiff

1592

# that may have better compression ratio than bsdiff.

1593

crop_threshold = 0.5

1594

1595

tgt_skipped = RangeSet()

1596

src_skipped = RangeSet()

1597

tgt_size = tgt_ranges.size()

1598

tgt_changed = 0

1599

for src_block, tgt_block in zip(src_ranges.next_item(),

1600

tgt_ranges.next_item()):

1601

src_rs = RangeSet(str(src_block))

1602

tgt_rs = RangeSet(str(tgt_block))

1603

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1604

tgt_skipped = tgt_skipped.union(tgt_rs)

1605

src_skipped = src_skipped.union(src_rs)

1606

else:

1607

tgt_changed += tgt_rs.size()

1608

1609

# Terminate early if no clear sign of benefits.

1610

if tgt_changed > tgt_size * crop_threshold:

1611

break

1612

1613

if tgt_changed < tgt_size * crop_threshold:

1614

assert tgt_changed + tgt_skipped.size() == tgt_size

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1615

logger.info(

1616

'%10d %10d (%6.2f%%) %s', tgt_skipped.size(), tgt_size,

1617

tgt_skipped.size() * 100.0 / tgt_size, tgt_name)

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1618

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1619

"%s-skipped" % (tgt_name,),

1620

"%s-skipped" % (src_name,),

1621

tgt_skipped, src_skipped, style, by_id)

1622

1623

# Intentionally change the file extension to avoid being imgdiff'd as

1624

# the files are no longer in their original format.

1625

tgt_name = "%s-cropped" % (tgt_name,)

1626

src_name = "%s-cropped" % (src_name,)

1627

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1628

src_ranges = src_ranges.subtract(src_skipped)

1629

1630

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

Tianjie Xu

2017-11-22 11:35:18 -0800

[diff] [blame]

1635

AddSplitTransfers(

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1636

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1637

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1638

def ParseAndValidateSplitInfo(patch_size, tgt_ranges, src_ranges,

1639

split_info):

1640

"""Parse the split_info and return a list of info tuples.

1641

1642

Args:

1643

patch_size: total size of the patch file.

1644

tgt_ranges: Ranges of the target file within the original image.

1645

src_ranges: Ranges of the source file within the original image.

split_info format:

imgdiff version#

count of pieces

<patch_size_1> <tgt_size_1> <src_ranges_1>

1650

...

1651

<patch_size_n> <tgt_size_n> <src_ranges_n>

1652

1653

Returns:

1654

[patch_start, patch_len, split_tgt_ranges, split_src_ranges]

1655

"""

1656

1657

version = int(split_info[0])

1658

assert version == 2

1659

count = int(split_info[1])

1660

assert len(split_info) - 2 == count

split_info_list = []

patch_start = 0

tgt_remain = copy.deepcopy(tgt_ranges)

1665

# each line has the format <patch_size>, <tgt_size>, <src_ranges>

1666

for line in split_info[2:]:

1667

info = line.split()

1668

assert len(info) == 3

1669

patch_length = int(info[0])

1670

1671

split_tgt_size = int(info[1])

1672

assert split_tgt_size % 4096 == 0

Tao Bao

2019-06-19 14:15:34 -0700

[diff] [blame^]

1673

assert split_tgt_size // 4096 <= tgt_remain.size()

1674

split_tgt_ranges = tgt_remain.first(split_tgt_size // 4096)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1675

tgt_remain = tgt_remain.subtract(split_tgt_ranges)

1676

1677

# Find the split_src_ranges within the image file from its relative

1678

# position in file.

1679

split_src_indices = RangeSet.parse_raw(info[2])

1680

split_src_ranges = RangeSet()

1681

for r in split_src_indices:

1682

curr_range = src_ranges.first(r[1]).subtract(src_ranges.first(r[0]))

1683

assert not split_src_ranges.overlaps(curr_range)

1684

split_src_ranges = split_src_ranges.union(curr_range)

1685

1686

split_info_list.append((patch_start, patch_length,

1687

split_tgt_ranges, split_src_ranges))

1688

patch_start += patch_length

1689

1690

# Check that the sizes of all the split pieces add up to the final file

1691

# size for patch and target.

1692

assert tgt_remain.size() == 0

1693

assert patch_start == patch_size

1694

return split_info_list

1695

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1696

def SplitLargeApks():

1697

"""Split the large apks files.

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1698

1699

Example: Chrome.apk will be split into

1700

src-0: Chrome.apk-0, tgt-0: Chrome.apk-0

1701

src-1: Chrome.apk-1, tgt-1: Chrome.apk-1

1702

...

1703

1704

After the split, the target pieces are continuous and block aligned; and

1705

the source pieces are mutually exclusive. During the split, we also

1706

generate and save the image patch between src-X & tgt-X. This patch will

1707

be valid because the block ranges of src-X & tgt-X will always stay the

1708

same afterwards; but there's a chance we don't use the patch if we

1709

convert the "diff" command into "new" or "move" later.

"""

while True:

with transfer_lock:

if not large_apks:

return

tgt_name, src_name, tgt_ranges, src_ranges = large_apks.pop(0)

1717

1718

src_file = common.MakeTempFile(prefix="src-")

1719

tgt_file = common.MakeTempFile(prefix="tgt-")

Tianjie Xu

df1166e

2018-01-27 17:35:41 -0800

[diff] [blame]

1720

with open(src_file, "wb") as src_fd:

1721

self.src.WriteRangeDataToFd(src_ranges, src_fd)

1722

with open(tgt_file, "wb") as tgt_fd:

1723

self.tgt.WriteRangeDataToFd(tgt_ranges, tgt_fd)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1724

1725

patch_file = common.MakeTempFile(prefix="patch-")

1726

patch_info_file = common.MakeTempFile(prefix="split_info-")

1727

cmd = ["imgdiff", "-z",

1728

"--block-limit={}".format(max_blocks_per_transfer),

1729

"--split-info=" + patch_info_file,

1730

src_file, tgt_file, patch_file]

Tao Bao

73dd4f4

2018-10-04 16:25:33 -0700

[diff] [blame]

1731

proc = common.Run(cmd)

1732

imgdiff_output, _ = proc.communicate()

1733

assert proc.returncode == 0, \

Tao Bao

2018-02-06 15:16:41 -0800

[diff] [blame]

1734

"Failed to create imgdiff patch between {} and {}:\n{}".format(

1735

src_name, tgt_name, imgdiff_output)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1736

1737

with open(patch_info_file) as patch_info:

1738

lines = patch_info.readlines()

1739

1740

patch_size_total = os.path.getsize(patch_file)

1741

split_info_list = ParseAndValidateSplitInfo(patch_size_total,

1742

tgt_ranges, src_ranges,

1743

lines)

1744

for index, (patch_start, patch_length, split_tgt_ranges,

Tao Bao

2018-02-09 13:44:43 -0800

[diff] [blame]

1745

split_src_ranges) in enumerate(split_info_list):

Tao Bao

2019-06-19 14:15:34 -0700

[diff] [blame^]

1746

with open(patch_file, 'rb') as f:

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1747

f.seek(patch_start)

1748

patch_content = f.read(patch_length)

1749

1750

split_src_name = "{}-{}".format(src_name, index)

1751

split_tgt_name = "{}-{}".format(tgt_name, index)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1752

split_large_apks.append((split_tgt_name,

split_src_name,

split_tgt_ranges,

split_src_ranges,

patch_content))

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1757

Tao Bao

2018-10-12 10:30:39 -0700

[diff] [blame]

1758

logger.info("Finding transfers...")

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1759

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1760

large_apks = []

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1761

split_large_apks = []

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1762

cache_size = common.OPTIONS.cache_size

1763

split_threshold = 0.125

1764

max_blocks_per_transfer = int(cache_size * split_threshold /

1765

self.tgt.blocksize)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1766

empty = RangeSet()

Tianjie Xu

20a86cd

2018-01-12 12:21:00 -0800

[diff] [blame]

1767

for tgt_fn, tgt_ranges in sorted(self.tgt.file_map.items()):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1768

if tgt_fn == "__ZERO":

1769

# the special "__ZERO" domain is all the blocks not contained

1770

# in any file and that are filled with zeros. We have a

1771

# special transfer style for zero blocks.

1772

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1773

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1774

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1775

continue

1776

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1777

elif tgt_fn == "__COPY":

1778

# "__COPY" domain includes all the blocks not contained in any

1779

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1780

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1781

continue

1782

Tianjie Xu

67c7cbb

2018-08-30 00:32:07 -0700

[diff] [blame]

1783

elif tgt_fn == "__HASHTREE":

1784

continue

1785

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1786

elif tgt_fn in self.src.file_map:

1787

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1788

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1789

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1790

continue

1791

1792

b = os.path.basename(tgt_fn)

1793

if b in self.src_basenames:

1794

# Look for an exact basename match in the source.

1795

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1796

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1797

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1798

continue

1799

1800

b = re.sub("[0-9]+", "#", b)

1801

if b in self.src_numpatterns:

1802

# Look for a 'number pattern' match (a basename match after

1803

# all runs of digits are replaced by "#"). (This is useful

1804

# for .so files that contain version numbers in the filename

1805

# that get bumped.)

1806

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1807

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1808

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1809

continue

1810

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1811

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1812

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1813

transfer_lock = threading.Lock()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1814

threads = [threading.Thread(target=SplitLargeApks)

Tianjie Xu

2017-09-08 17:19:02 -0700

[diff] [blame]

1815

for _ in range(self.threads)]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1821

# Sort the split transfers for large apks to generate a determinate package.

1822

split_large_apks.sort()

1823

for (tgt_name, src_name, tgt_ranges, src_ranges,

1824

patch) in split_large_apks:

1825

transfer_split = Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1826

self.tgt.RangeSha1(tgt_ranges),

1827

self.src.RangeSha1(src_ranges),

1828

"diff", self.transfers)

xunchang

2018-12-06 14:20:05 -0800

[diff] [blame]

1829

transfer_split.patch_info = PatchInfo(True, patch)

Tianjie Xu

2018-01-10 10:55:19 -0800

[diff] [blame]

1830

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1831

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1832

for k in self.src.file_map.keys():

1833

b = os.path.basename(k)

1834

self.src_basenames[b] = k

1835

b = re.sub("[0-9]+", "#", b)

1836

self.src_numpatterns[b] = k

1837

1838

@staticmethod

1839

def AssertPartition(total, seq):

1840

"""Assert that all the RangeSets in 'seq' form a partition of the

1841

'total' RangeSet (ie, they are nonintersecting and their union

1842

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1843

Doug Zongker