Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

17

import array

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

18

import common

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

20

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

21

import itertools

22

import multiprocessing

23

import os

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

24

import os.path

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

25

import re

26

import subprocess

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

27

import sys

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

29

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

30

from collections import deque, OrderedDict

31

from hashlib import sha1

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

32

from rangelib import RangeSet

33

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

34

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

35

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

36

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

37

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

38

def compute_patch(srcfile, tgtfile, imgdiff=False):

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

39

patchfile = common.MakeTempFile(prefix='patch-')

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

40

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

41

cmd = ['imgdiff', '-z'] if imgdiff else ['bsdiff']

42

cmd.extend([srcfile, tgtfile, patchfile])

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

43

Tao Bao

3945158

2017-05-04 11:10:47 -0700

[diff] [blame]

44

# Don't dump the bsdiff/imgdiff commands, which are not useful for the case

45

# here, since they contain temp filenames only.

46

p = common.Run(cmd, verbose=False, stdout=subprocess.PIPE,

47

stderr=subprocess.STDOUT)

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

48

output, _ = p.communicate()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

49

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

50

if p.returncode != 0:

51

raise ValueError(output)

52

53

with open(patchfile, 'rb') as f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

54

return f.read()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

55

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

56

57

class Image(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

58

def RangeSha1(self, ranges):

59

raise NotImplementedError

60

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

61

def ReadRangeSet(self, ranges):

62

raise NotImplementedError

63

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

64

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

65

raise NotImplementedError

66

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

67

def WriteRangeDataToFd(self, ranges, fd):

68

raise NotImplementedError

69

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

70

71

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

72

"""A zero-length image."""

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

73

74

def __init__(self):

75

self.blocksize = 4096

76

self.care_map = RangeSet()

77

self.clobbered_blocks = RangeSet()

78

self.extended = RangeSet()

79

self.total_blocks = 0

80

self.file_map = {}

81

82

def RangeSha1(self, ranges):

83

return sha1().hexdigest()

84

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

85

def ReadRangeSet(self, ranges):

86

return ()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

87

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

88

def TotalSha1(self, include_clobbered_blocks=False):

89

# EmptyImage always carries empty clobbered_blocks, so

90

# include_clobbered_blocks can be ignored.

91

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

92

return sha1().hexdigest()

93

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

94

def WriteRangeDataToFd(self, ranges, fd):

95

raise ValueError("Can't write data from EmptyImage to file")

96

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

97

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

98

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

99

"""An image wrapped around a single string of data."""

100

101

def __init__(self, data, trim=False, pad=False):

102

self.data = data

103

self.blocksize = 4096

104

105

assert not (trim and pad)

106

107

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

108

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

109

if partial > 0:

110

if trim:

111

self.data = self.data[:-partial]

112

elif pad:

113

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

114

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

115

else:

116

raise ValueError(("data for DataImage must be multiple of %d bytes "

117

"unless trim or pad is specified") %

118

(self.blocksize,))

119

120

assert len(self.data) % self.blocksize == 0

121

122

self.total_blocks = len(self.data) / self.blocksize

123

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

124

# When the last block is padded, we always write the whole block even for

125

# incremental OTAs. Because otherwise the last block may get skipped if

126

# unchanged for an incremental, but would fail the post-install

127

# verification if it has non-zero contents in the padding bytes.

128

# Bug: 23828506

129

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

130

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

131

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

132

clobbered_blocks = []

133

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

134

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

139

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

140

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

141

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

142

if d == reference:

143

zero_blocks.append(i)

144

zero_blocks.append(i+1)

145

else:

146

nonzero_blocks.append(i)

147

nonzero_blocks.append(i+1)

148

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

149

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

150

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

151

self.file_map = dict()

152

if zero_blocks:

153

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

154

if nonzero_blocks:

155

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

156

if clobbered_blocks:

157

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

158

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

159

def _GetRangeData(self, ranges):

160

for s, e in ranges:

161

yield self.data[s*self.blocksize:e*self.blocksize]

162

163

def RangeSha1(self, ranges):

164

h = sha1()

165

for data in self._GetRangeData(ranges):

h.update(data)

return h.hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

169

def ReadRangeSet(self, ranges):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

170

return [self._GetRangeData(ranges)]

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

171

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

172

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

173

if not include_clobbered_blocks:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

174

return self.RangeSha1(self.care_map.subtract(self.clobbered_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

175

else:

176

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

177

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

178

def WriteRangeDataToFd(self, ranges, fd):

179

for data in self._GetRangeData(ranges):

180

fd.write(data)

181

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

182

183

class Transfer(object):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

184

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, tgt_sha1,

185

src_sha1, style, by_id):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

186

self.tgt_name = tgt_name

187

self.src_name = src_name

188

self.tgt_ranges = tgt_ranges

189

self.src_ranges = src_ranges

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

190

self.tgt_sha1 = tgt_sha1

191

self.src_sha1 = src_sha1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

192

self.style = style

193

self.intact = (getattr(tgt_ranges, "monotonic", False) and

194

getattr(src_ranges, "monotonic", False))

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

195

196

# We use OrderedDict rather than dict so that the output is repeatable;

197

# otherwise it would depend on the hash values of the Transfer objects.

198

self.goes_before = OrderedDict()

199

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

200

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

201

self.stash_before = []

202

self.use_stash = []

203

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

207

def NetStashChange(self):

208

return (sum(sr.size() for (_, sr) in self.stash_before) -

209

sum(sr.size() for (_, sr) in self.use_stash))

210

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

211

def ConvertToNew(self):

212

assert self.style != "new"

213

self.use_stash = []

214

self.style = "new"

215

self.src_ranges = RangeSet()

216

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

217

def __str__(self):

218

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

219

" to " + str(self.tgt_ranges) + ">")

220

221

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

222

@functools.total_ordering

223

class HeapItem(object):

224

def __init__(self, item):

225

self.item = item

226

# Negate the score since python's heap is a min-heap and we want

227

# the maximum score.

228

self.score = -item.score

def clear(self):

self.item = None

def __bool__(self):

return self.item is None

233

def __eq__(self, other):

234

return self.score == other.score

235

def __le__(self, other):

236

return self.score <= other.score

237

238

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

239

# BlockImageDiff works on two image objects. An image object is

240

# anything that provides the following attributes:

241

#

242

# blocksize: the size in bytes of a block, currently must be 4096.

243

#

244

# total_blocks: the total size of the partition/image, in blocks.

245

#

246

# care_map: a RangeSet containing which blocks (in the range [0,

247

# total_blocks) we actually care about; i.e. which blocks contain

248

# data.

249

#

250

# file_map: a dict that partitions the blocks contained in care_map

251

# into smaller domains that are useful for doing diffs on.

252

# (Typically a domain is a file, and the key in file_map is the

253

# pathname.)

254

#

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

255

# clobbered_blocks: a RangeSet containing which blocks contain data

256

# but may be altered by the FS. They need to be excluded when

257

# verifying the partition integrity.

258

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

259

# ReadRangeSet(): a function that takes a RangeSet and returns the

260

# data contained in the image blocks of that RangeSet. The data

261

# is returned as a list or tuple of strings; concatenating the

262

# elements together should produce the requested data.

263

# Implementations are free to break up the data into list/tuple

264

# elements in any way that is convenient.

265

#

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

266

# RangeSha1(): a function that returns (as a hex string) the SHA-1

267

# hash of all the data in the specified range.

268

#

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

269

# TotalSha1(): a function that returns (as a hex string) the SHA-1

270

# hash of all the data in the image (ie, all the blocks in the

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

271

# care_map minus clobbered_blocks, or including the clobbered

272

# blocks if include_clobbered_blocks is True).

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

273

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

274

# When creating a BlockImageDiff, the src image may be None, in which

275

# case the list of transfers produced will never read from the

276

# original image.

277

278

class BlockImageDiff(object):

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

279

def __init__(self, tgt, src=None, threads=None, version=4,

280

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

281

if threads is None:

282

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

283

if threads == 0:

284

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

285

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

286

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

287

self.transfers = []

288

self.src_basenames = {}

289

self.src_numpatterns = {}

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

290

self._max_stashed_size = 0

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

291

self.touched_src_ranges = RangeSet()

292

self.touched_src_sha1 = None

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

293

self.disable_imgdiff = disable_imgdiff

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

294

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

295

assert version in (3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

303

assert tgt.blocksize == 4096

304

assert src.blocksize == 4096

305

306

# The range sets in each filemap should comprise a partition of

307

# the care map.

308

self.AssertPartition(src.care_map, src.file_map.values())

309

self.AssertPartition(tgt.care_map, tgt.file_map.values())

310

Tao Bao

b4cfca5

2016-02-04 14:26:02 -0800

[diff] [blame]

311

@property

312

def max_stashed_size(self):

313

return self._max_stashed_size

314

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

315

def Compute(self, prefix):

316

# When looking for a source file to use as the diff input for a

317

# target file, we try:

318

# 1) an exact path match if available, otherwise

319

# 2) a exact basename match if available, otherwise

320

# 3) a basename match after all runs of digits are replaced by

321

# "#" if available, otherwise

322

# 4) we have no source for this target.

323

self.AbbreviateSourceNames()

324

self.FindTransfers()

325

326

# Find the ordering dependencies among transfers (this is O(n^2)

327

# in the number of transfers).

328

self.GenerateDigraph()

329

# Find a sequence of transfers that satisfies as many ordering

330

# dependencies as possible (heuristically).

331

self.FindVertexSequence()

332

# Fix up the ordering dependencies that the sequence didn't

333

# satisfy.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

334

self.ReverseBackwardEdges()

335

self.ImproveVertexSequence()

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

336

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

337

# Ensure the runtime stash size is under the limit.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

338

if common.OPTIONS.cache_size is not None:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

339

self.ReviseStashSize()

340

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

341

# Double-check our work.

342

self.AssertSequenceGood()

343

344

self.ComputePatches(prefix)

345

self.WriteTransfers(prefix)

346

347

def WriteTransfers(self, prefix):

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

348

def WriteSplitTransfers(out, style, target_blocks):

349

"""Limit the size of operand in command 'new' and 'zero' to 1024 blocks.

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

350

351

This prevents the target size of one command from being too large; and

352

might help to avoid fsync errors on some devices."""

353

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

354

assert style == "new" or style == "zero"

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

355

blocks_limit = 1024

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

356

total = 0

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

357

while target_blocks:

358

blocks_to_write = target_blocks.first(blocks_limit)

359

out.append("%s %s\n" % (style, blocks_to_write.to_string_raw()))

360

total += blocks_to_write.size()

361

target_blocks = target_blocks.subtract(blocks_to_write)

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

362

return total

363

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

364

out = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

365

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

366

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

367

# In BBOTA v3+, it uses the hash of the stashed blocks as the stash slot

368

# id. 'stashes' records the map from 'hash' to the ref count. The stash

369

# will be freed only if the count decrements to zero.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

370

stashes = {}

371

stashed_blocks = 0

372

max_stashed_blocks = 0

373

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

374

for xf in self.transfers:

375

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

376

for _, sr in xf.stash_before:

377

sh = self.src.RangeSha1(sr)

378

if sh in stashes:

379

stashes[sh] += 1

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

380

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

381

stashes[sh] = 1

382

stashed_blocks += sr.size()

383

self.touched_src_ranges = self.touched_src_ranges.union(sr)

384

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

385

386

if stashed_blocks > max_stashed_blocks:

387

max_stashed_blocks = stashed_blocks

388

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

389

free_string = []

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

390

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

391

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

392

# <# blocks> <src ranges>

393

# OR

394

# <# blocks> <src ranges> <src locs> <stash refs...>

395

# OR

396

# <# blocks> - <stash refs...>

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

397

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

398

size = xf.src_ranges.size()

399

src_str = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

400

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

401

unstashed_src_ranges = xf.src_ranges

402

mapped_stashes = []

403

for _, sr in xf.use_stash:

404

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

405

sh = self.src.RangeSha1(sr)

406

sr = xf.src_ranges.map_within(sr)

407

mapped_stashes.append(sr)

408

assert sh in stashes

409

src_str.append("%s:%s" % (sh, sr.to_string_raw()))

410

stashes[sh] -= 1

411

if stashes[sh] == 0:

412

free_string.append("free %s\n" % (sh,))

413

free_size += sr.size()

414

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

415

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

416

if unstashed_src_ranges:

417

src_str.insert(1, unstashed_src_ranges.to_string_raw())

418

if xf.use_stash:

419

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

420

src_str.insert(2, mapped_unstashed.to_string_raw())

421

mapped_stashes.append(mapped_unstashed)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

422

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

423

else:

424

src_str.insert(1, "-")

425

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

426

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

427

src_str = " ".join(src_str)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

428

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

429

# version 3+:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

430

# zero <rangeset>

431

# new <rangeset>

432

# erase <rangeset>

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

433

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

434

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

435

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

436

437

tgt_size = xf.tgt_ranges.size()

438

439

if xf.style == "new":

440

assert xf.tgt_ranges

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

441

assert tgt_size == WriteSplitTransfers(out, xf.style, xf.tgt_ranges)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

442

total += tgt_size

443

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

444

assert xf.tgt_ranges

445

assert xf.src_ranges.size() == tgt_size

446

if xf.src_ranges != xf.tgt_ranges:

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

447

# take into account automatic stashing of overlapping blocks

448

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

449

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

29f529f

2015-04-17 16:28:08 +0100

[diff] [blame]

450

if temp_stash_usage > max_stashed_blocks:

451

max_stashed_blocks = temp_stash_usage

452

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

453

self.touched_src_ranges = self.touched_src_ranges.union(

454

xf.src_ranges)

455

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

456

out.append("%s %s %s %s\n" % (

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

457

xf.style,

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

458

xf.tgt_sha1,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

459

xf.tgt_ranges.to_string_raw(), src_str))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

460

total += tgt_size

461

elif xf.style in ("bsdiff", "imgdiff"):

462

assert xf.tgt_ranges

463

assert xf.src_ranges

464

# take into account automatic stashing of overlapping blocks

465

if xf.src_ranges.overlaps(xf.tgt_ranges):

466

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

467

if temp_stash_usage > max_stashed_blocks:

468

max_stashed_blocks = temp_stash_usage

469

470

self.touched_src_ranges = self.touched_src_ranges.union(xf.src_ranges)

471

472

out.append("%s %d %d %s %s %s %s\n" % (

473

xf.style,

474

xf.patch_start, xf.patch_len,

475

xf.src_sha1,

476

xf.tgt_sha1,

477

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

478

total += tgt_size

479

elif xf.style == "zero":

480

assert xf.tgt_ranges

481

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

482

assert WriteSplitTransfers(out, xf.style, to_zero) == to_zero.size()

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

483

total += to_zero.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

484

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

485

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

486

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

487

if free_string:

488

out.append("".join(free_string))

caozhiyuan

21b37d8

2015-10-21 15:14:03 +0800

[diff] [blame]

489

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

490

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

491

if common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

492

# Sanity check: abort if we're going to need more stash space than

493

# the allowed size (cache_size * threshold). There are two purposes

494

# of having a threshold here. a) Part of the cache may have been

495

# occupied by some recovery logs. b) It will buy us some time to deal

496

# with the oversize issue.

497

cache_size = common.OPTIONS.cache_size

498

stash_threshold = common.OPTIONS.stash_threshold

499

max_allowed = cache_size * stash_threshold

Tao Bao

e8c68a0

2017-02-26 10:48:11 -0800

[diff] [blame]

500

assert max_stashed_blocks * self.tgt.blocksize <= max_allowed, \

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

501

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

502

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

503

self.tgt.blocksize, max_allowed, cache_size,

504

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

505

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

506

self.touched_src_sha1 = self.src.RangeSha1(self.touched_src_ranges)

Tao Bao

d522bdc

2016-04-12 15:53:16 -0700

[diff] [blame]

507

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

508

# Zero out extended blocks as a workaround for bug 20881595.

509

if self.tgt.extended:

Tianjie Xu

2016-06-23 16:10:35 -0700

[diff] [blame]

510

assert (WriteSplitTransfers(out, "zero", self.tgt.extended) ==

Tianjie Xu

2016-06-21 15:54:09 -0700

[diff] [blame]

511

self.tgt.extended.size())

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

512

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

513

514

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

515

# data in the new image; b) will not be touched by dm-verity. Out of those

516

# blocks, we erase the ones that won't be used in this update at the

517

# beginning of an update. The rest would be erased at the end. This is to

518

# work around the eMMC issue observed on some devices, which may otherwise

519

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

520

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

521

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

522

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

523

524

erase_first = new_dontcare.subtract(self.touched_src_ranges)

525

if erase_first:

526

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

527

528

erase_last = new_dontcare.subtract(erase_first)

529

if erase_last:

530

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

531

532

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

533

out.insert(1, "%d\n" % (total,))

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

534

# v3+: the number of stash slots is unused.

535

out.insert(2, "0\n")

536

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

537

538

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

542

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

543

OPTIONS = common.OPTIONS

544

if OPTIONS.cache_size is not None:

545

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

546

print("max stashed blocks: %d (%d bytes), "

547

"limit: %d bytes (%.2f%%)\n" % (

548

max_stashed_blocks, self._max_stashed_size, max_allowed,

549

self._max_stashed_size * 100.0 / max_allowed))

550

else:

551

print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (

552

max_stashed_blocks, self._max_stashed_size))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

553

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

554

def ReviseStashSize(self):

555

print("Revising stash size...")

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

556

stash_map = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

557

558

# Create the map between a stash and its def/use points. For example, for a

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

559

# given stash of (raw_id, sr), stash_map[raw_id] = (sr, def_cmd, use_cmd).

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

560

for xf in self.transfers:

561

# Command xf defines (stores) all the stashes in stash_before.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

562

for stash_raw_id, sr in xf.stash_before:

563

stash_map[stash_raw_id] = (sr, xf)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

564

565

# Record all the stashes command xf uses.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

566

for stash_raw_id, _ in xf.use_stash:

567

stash_map[stash_raw_id] += (xf,)

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

568

569

# Compute the maximum blocks available for stash based on /cache size and

570

# the threshold.

571

cache_size = common.OPTIONS.cache_size

572

stash_threshold = common.OPTIONS.stash_threshold

573

max_allowed = cache_size * stash_threshold / self.tgt.blocksize

574

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

575

# See the comments for 'stashes' in WriteTransfers().

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

576

stashes = {}

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

577

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

578

new_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

579

580

# Now go through all the commands. Compute the required stash size on the

581

# fly. If a command requires excess stash than available, it deletes the

582

# stash by replacing the command that uses the stash with a "new" command

583

# instead.

584

for xf in self.transfers:

585

replaced_cmds = []

586

587

# xf.stash_before generates explicit stash commands.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

588

for stash_raw_id, sr in xf.stash_before:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

589

# Check the post-command stashed_blocks.

590

stashed_blocks_after = stashed_blocks

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

591

sh = self.src.RangeSha1(sr)

592

if sh not in stashes:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

593

stashed_blocks_after += sr.size()

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

594

595

if stashed_blocks_after > max_allowed:

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

596

# We cannot stash this one for a later command. Find out the command

597

# that will use this stash and replace the command with "new".

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

598

use_cmd = stash_map[stash_raw_id][2]

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

599

replaced_cmds.append(use_cmd)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

600

print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

601

else:

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

602

# Update the stashes map.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

603

if sh in stashes:

604

stashes[sh] += 1

Tao Bao

3c5a16d

2017-02-13 11:42:50 -0800

[diff] [blame]

605

else:

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

606

stashes[sh] = 1

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

607

stashed_blocks = stashed_blocks_after

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

608

609

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

610

# ComputePatches(), they both have the style of "diff".

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

611

if xf.style == "diff":

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

612

assert xf.tgt_ranges and xf.src_ranges

613

if xf.src_ranges.overlaps(xf.tgt_ranges):

614

if stashed_blocks + xf.src_ranges.size() > max_allowed:

615

replaced_cmds.append(xf)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

616

print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

617

618

# Replace the commands in replaced_cmds with "new"s.

619

for cmd in replaced_cmds:

620

# It no longer uses any commands in "use_stash". Remove the def points

621

# for all those stashes.

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

622

for stash_raw_id, sr in cmd.use_stash:

623

def_cmd = stash_map[stash_raw_id][1]

624

assert (stash_raw_id, sr) in def_cmd.stash_before

625

def_cmd.stash_before.remove((stash_raw_id, sr))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

626

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

627

# Add up blocks that violates space limit and print total number to

628

# screen later.

629

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

630

cmd.ConvertToNew()

631

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

632

# xf.use_stash may generate free commands.

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

633

for _, sr in xf.use_stash:

634

sh = self.src.RangeSha1(sr)

635

assert sh in stashes

636

stashes[sh] -= 1

637

if stashes[sh] == 0:

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

638

stashed_blocks -= sr.size()

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

639

stashes.pop(sh)

Tao Bao

2016-12-16 11:13:55 -0800

[diff] [blame]

640

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

641

num_of_bytes = new_blocks * self.tgt.blocksize

642

print(" Total %d blocks (%d bytes) are packed as new blocks due to "

643

"insufficient cache size." % (new_blocks, num_of_bytes))

Tao Bao

304ee27

2016-12-19 11:01:38 -0800

[diff] [blame]

644

return new_blocks

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

645

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

646

def ComputePatches(self, prefix):

647

print("Reticulating splines...")

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

648

diff_queue = []

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

649

patch_num = 0

650

with open(prefix + ".new.dat", "wb") as new_f:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

651

for index, xf in enumerate(self.transfers):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

652

if xf.style == "zero":

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

653

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

654

print("%10d %10d (%6.2f%%) %7s %s %s" % (

655

tgt_size, tgt_size, 100.0, xf.style, xf.tgt_name,

656

str(xf.tgt_ranges)))

657

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

658

elif xf.style == "new":

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

659

self.tgt.WriteRangeDataToFd(xf.tgt_ranges, new_f)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

660

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

661

print("%10d %10d (%6.2f%%) %7s %s %s" % (

662

tgt_size, tgt_size, 100.0, xf.style,

663

xf.tgt_name, str(xf.tgt_ranges)))

664

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

665

elif xf.style == "diff":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

666

# We can't compare src and tgt directly because they may have

667

# the same content but be broken up into blocks differently, eg:

668

#

669

# ["he", "llo"] vs ["h", "ello"]

670

#

671

# We want those to compare equal, ideally without having to

672

# actually concatenate the strings (these may be tens of

673

# megabytes).

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

674

if xf.src_sha1 == xf.tgt_sha1:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

675

# These are identical; we don't need to generate a patch,

676

# just issue copy commands on the device.

677

xf.style = "move"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

678

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

679

if xf.src_ranges != xf.tgt_ranges:

680

print("%10d %10d (%6.2f%%) %7s %s %s (from %s)" % (

681

tgt_size, tgt_size, 100.0, xf.style,

682

xf.tgt_name if xf.tgt_name == xf.src_name else (

683

xf.tgt_name + " (from " + xf.src_name + ")"),

684

str(xf.tgt_ranges), str(xf.src_ranges)))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

685

else:

686

# For files in zip format (eg, APKs, JARs, etc.) we would

687

# like to use imgdiff -z if possible (because it usually

688

# produces significantly smaller patches than bsdiff).

689

# This is permissible if:

690

#

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

691

# - imgdiff is not disabled, and

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

692

# - the source and target files are monotonic (ie, the

693

# data is stored with blocks in increasing order), and

694

# - we haven't removed any blocks from the source set.

695

#

696

# If these conditions are satisfied then appending all the

697

# blocks in the set together in order will produce a valid

698

# zip file (plus possibly extra zeros in the last block),

699

# which is what imgdiff needs to operate. (imgdiff is

700

# fine with extra zeros at the end of the file.)

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

701

imgdiff = (not self.disable_imgdiff and xf.intact and

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

702

xf.tgt_name.split(".")[-1].lower()

703

in ("apk", "jar", "zip"))

704

xf.style = "imgdiff" if imgdiff else "bsdiff"

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

705

diff_queue.append((index, imgdiff, patch_num))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

patch_num += 1

else:

assert False, "unknown style " + xf.style

710

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

711

if diff_queue:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

712

if self.threads > 1:

713

print("Computing patches (using %d threads)..." % (self.threads,))

714

else:

715

print("Computing patches...")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

716

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

717

diff_total = len(diff_queue)

718

patches = [None] * diff_total

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

719

error_messages = []

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame^]

720

warning_messages = []

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

721

if sys.stdout.isatty():

722

global diff_done

723

diff_done = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

724

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

725

# Using multiprocessing doesn't give additional benefits, due to the

726

# pattern of the code. The diffing work is done by subprocess.call, which

727

# already runs in a separate process (not affected much by the GIL -

728

# Global Interpreter Lock). Using multiprocess also requires either a)

729

# writing the diff input files in the main process before forking, or b)

730

# reopening the image file (SparseImage) in the worker processes. Doing

731

# neither of them further improves the performance.

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

732

lock = threading.Lock()

733

def diff_worker():

734

while True:

735

with lock:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

736

if not diff_queue:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

737

return

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

738

xf_index, imgdiff, patch_index = diff_queue.pop()

739

740

xf = self.transfers[xf_index]

741

src_ranges = xf.src_ranges

742

tgt_ranges = xf.tgt_ranges

743

744

# Needs lock since WriteRangeDataToFd() is stateful (calling seek).

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

745

with lock:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

746

src_file = common.MakeTempFile(prefix="src-")

747

with open(src_file, "wb") as fd:

748

self.src.WriteRangeDataToFd(src_ranges, fd)

749

750

tgt_file = common.MakeTempFile(prefix="tgt-")

751

with open(tgt_file, "wb") as fd:

752

self.tgt.WriteRangeDataToFd(tgt_ranges, fd)

753

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame^]

754

message = []

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

755

try:

756

patch = compute_patch(src_file, tgt_file, imgdiff)

757

except ValueError as e:

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame^]

758

message.append(

759

"Failed to generate %s for %s: tgt=%s, src=%s:\n%s" % (

760

"imgdiff" if imgdiff else "bsdiff",

761

xf.tgt_name if xf.tgt_name == xf.src_name else

762

xf.tgt_name + " (from " + xf.src_name + ")",

763

xf.tgt_ranges, xf.src_ranges, e.message))

764

# TODO(b/68016761): Better handle the holes in mke2fs created images.

765

if imgdiff:

766

try:

767

patch = compute_patch(src_file, tgt_file, imgdiff=False)

768

message.append(

769

"Fell back and generated with bsdiff instead for %s" % (

770

xf.tgt_name,))

771

with lock:

772

warning_messages.extend(message)

773

del message[:]

774

except ValueError as e:

775

message.append(

776

"Also failed to generate with bsdiff for %s:\n%s" % (

777

xf.tgt_name, e.message))

778

779

if message:

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

780

with lock:

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame^]

781

error_messages.extend(message)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

782

783

with lock:

784

patches[patch_index] = (xf_index, patch)

785

if sys.stdout.isatty():

Tao Bao

33635b1

2017-03-12 13:02:51 -0700

[diff] [blame]

786

global diff_done

787

diff_done += 1

788

progress = diff_done * 100 / diff_total

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

789

# '\033[K' is to clear to EOL.

790

print(' [%d%%] %s\033[K' % (progress, xf.tgt_name), end='\r')

791

sys.stdout.flush()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

792

793

threads = [threading.Thread(target=diff_worker)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

794

for _ in range(self.threads)]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for th in threads:

th.start()

while threads:

threads.pop().join()

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

799

800

if sys.stdout.isatty():

801

print('\n')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

802

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame^]

803

if warning_messages:

804

print('WARNING:')

805

print('\n'.join(warning_messages))

806

print('\n\n\n')

807

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

808

if error_messages:

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame^]

809

print('ERROR:')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

810

print('\n'.join(error_messages))

Tao Bao

2017-10-19 16:51:53 -0700

[diff] [blame^]

811

print('\n\n\n')

Tianjie Xu

2016-10-28 17:55:53 -0700

[diff] [blame]

812

sys.exit(1)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

else:

patches = []

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

816

offset = 0

817

with open(prefix + ".patch.dat", "wb") as patch_fd:

818

for index, patch in patches:

819

xf = self.transfers[index]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

820

xf.patch_len = len(patch)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

821

xf.patch_start = offset

822

offset += xf.patch_len

823

patch_fd.write(patch)

824

825

if common.OPTIONS.verbose:

826

tgt_size = xf.tgt_ranges.size() * self.tgt.blocksize

827

print("%10d %10d (%6.2f%%) %7s %s %s %s" % (

828

xf.patch_len, tgt_size, xf.patch_len * 100.0 / tgt_size,

829

xf.style,

830

xf.tgt_name if xf.tgt_name == xf.src_name else (

831

xf.tgt_name + " (from " + xf.src_name + ")"),

832

xf.tgt_ranges, xf.src_ranges))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

833

834

def AssertSequenceGood(self):

835

# Simulate the sequences of transfers we will output, and check that:

836

# - we never read a block after writing it, and

837

# - we write every block we care about exactly once.

838

839

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

840

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

841

842

# Imagine processing the transfers in order.

843

for xf in self.transfers:

844

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

845

846

x = xf.src_ranges

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

847

for _, sr in xf.use_stash:

848

x = x.subtract(sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

849

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

850

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

851

# Source image could be larger. Don't check the blocks that are in the

852

# source image only. Since they are not in 'touched', and won't ever

853

# be touched.

854

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

855

assert touched[i] == 0

856

857

# Check that the output blocks for this transfer haven't yet

858

# been touched, and touch all the blocks written by this

859

# transfer.

860

for s, e in xf.tgt_ranges:

861

for i in range(s, e):

862

assert touched[i] == 0

863

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

864

865

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

866

for s, e in self.tgt.care_map:

867

for i in range(s, e):

868

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

869

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

870

def ImproveVertexSequence(self):

871

print("Improving vertex order...")

872

873

# At this point our digraph is acyclic; we reversed any edges that

874

# were backwards in the heuristically-generated sequence. The

875

# previously-generated order is still acceptable, but we hope to

876

# find a better order that needs less memory for stashed data.

877

# Now we do a topological sort to generate a new vertex order,

878

# using a greedy algorithm to choose which vertex goes next

879

# whenever we have a choice.

880

881

# Make a copy of the edge set; this copy will get destroyed by the

882

# algorithm.

883

for xf in self.transfers:

884

xf.incoming = xf.goes_after.copy()

885

xf.outgoing = xf.goes_before.copy()

886

887

L = [] # the new vertex order

888

889

# S is the set of sources in the remaining graph; we always choose

890

# the one that leaves the least amount of stashed data after it's

891

# executed.

892

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

898

L.append(xf)

899

for u in xf.outgoing:

900

del u.incoming[xf]

901

if not u.incoming:

902

heapq.heappush(S, (u.NetStashChange(), u.order, u))

903

904

# if this fails then our graph had a cycle.

905

assert len(L) == len(self.transfers)

906

907

self.transfers = L

908

for i, xf in enumerate(L):

909

xf.order = i

910

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

911

def RemoveBackwardEdges(self):

912

print("Removing backward edges...")

in_order = 0

out_of_order = 0

lost_source = 0

for xf in self.transfers:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

918

lost = 0

919

size = xf.src_ranges.size()

920

for u in xf.goes_before:

921

# xf should go before u

922

if xf.order < u.order:

923

# it does, hurray!

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

924

in_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

925

else:

926

# it doesn't, boo. trim the blocks that u writes from xf's

927

# source, so that xf can go after u.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

928

out_of_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

929

assert xf.src_ranges.overlaps(u.tgt_ranges)

930

xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)

931

xf.intact = False

932

933

if xf.style == "diff" and not xf.src_ranges:

934

# nothing left to diff from; treat as new data

935

xf.style = "new"

936

937

lost = size - xf.src_ranges.size()

938

lost_source += lost

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

939

940

print((" %d/%d dependencies (%.2f%%) were violated; "

941

"%d source blocks removed.") %

942

(out_of_order, in_order + out_of_order,

943

(out_of_order * 100.0 / (in_order + out_of_order))

944

if (in_order + out_of_order) else 0.0,

945

lost_source))

946

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

947

def ReverseBackwardEdges(self):

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

948

"""Reverse unsatisfying edges and compute pairs of stashed blocks.

949

950

For each transfer, make sure it properly stashes the blocks it touches and

951

will be used by later transfers. It uses pairs of (stash_raw_id, range) to

952

record the blocks to be stashed. 'stash_raw_id' is an id that uniquely

953

identifies each pair. Note that for the same range (e.g. RangeSet("1-5")),

954

it is possible to have multiple pairs with different 'stash_raw_id's. Each

955

'stash_raw_id' will be consumed by one transfer. In BBOTA v3+, identical

956

blocks will be written to the same stash slot in WriteTransfers().

957

"""

958

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

959

print("Reversing backward edges...")

960

in_order = 0

961

out_of_order = 0

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

962

stash_raw_id = 0

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

963

stash_size = 0

964

965

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

966

for u in xf.goes_before.copy():

967

# xf should go before u

968

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

973

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

978

assert overlap

979

Tao Bao

2016-12-28 09:14:39 -0800

[diff] [blame]

980

u.stash_before.append((stash_raw_id, overlap))

981

xf.use_stash.append((stash_raw_id, overlap))

982

stash_raw_id += 1

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

983

stash_size += overlap.size()

984

985

# reverse the edge direction; now xf must go after u

986

del xf.goes_before[u]

987

del u.goes_after[xf]

988

xf.goes_after[u] = None # value doesn't matter

989

u.goes_before[xf] = None

990

991

print((" %d/%d dependencies (%.2f%%) were violated; "

992

"%d source blocks stashed.") %

993

(out_of_order, in_order + out_of_order,

994

(out_of_order * 100.0 / (in_order + out_of_order))

995

if (in_order + out_of_order) else 0.0,

996

stash_size))

997

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

998

def FindVertexSequence(self):

999

print("Finding vertex sequence...")

1000

1001

# This is based on "A Fast & Effective Heuristic for the Feedback

1002

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

1003

# it as starting with the digraph G and moving all the vertices to

1004

# be on a horizontal line in some order, trying to minimize the

1005

# number of edges that end up pointing to the left. Left-pointing

1006

# edges will get removed to turn the digraph into a DAG. In this

1007

# case each edge has a weight which is the number of source blocks

1008

# we'll lose if that edge is removed; we try to minimize the total

1009

# weight rather than just the number of edges.

1010

1011

# Make a copy of the edge set; this copy will get destroyed by the

1012

# algorithm.

1013

for xf in self.transfers:

1014

xf.incoming = xf.goes_after.copy()

1015

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1016

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1017

1018

# We use an OrderedDict instead of just a set so that the output

1019

# is repeatable; otherwise it would depend on the hash values of

1020

# the transfer objects.

1021

G = OrderedDict()

1022

for xf in self.transfers:

1023

G[xf] = None

1024

s1 = deque() # the left side of the sequence, built from left to right

1025

s2 = deque() # the right side of the sequence, built from right to left

1026

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1027

heap = []

1028

for xf in self.transfers:

1029

xf.heap_item = HeapItem(xf)

1030

heap.append(xf.heap_item)

1031

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1032

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1033

# Use OrderedDict() instead of set() to preserve the insertion order. Need

1034

# to use 'sinks[key] = None' to add key into the set. sinks will look like

1035

# { key1: None, key2: None, ... }.

1036

sinks = OrderedDict.fromkeys(u for u in G if not u.outgoing)

1037

sources = OrderedDict.fromkeys(u for u in G if not u.incoming)

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1038

1039

def adjust_score(iu, delta):

1040

iu.score += delta

1041

iu.heap_item.clear()

1042

iu.heap_item = HeapItem(iu)

1043

heapq.heappush(heap, iu.heap_item)

1044

1045

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1046

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1047

while sinks:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1048

new_sinks = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1049

for u in sinks:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1050

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1051

s2.appendleft(u)

1052

del G[u]

1053

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1054

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1055

if not iu.outgoing:

1056

new_sinks[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1057

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1058

1059

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1060

while sources:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1061

new_sources = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1062

for u in sources:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1063

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1064

s1.append(u)

1065

del G[u]

1066

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1067

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1068

if not iu.incoming:

1069

new_sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1070

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1071

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1072

if not G: break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1073

1074

# Find the "best" vertex to put next. "Best" is the one that

1075

# maximizes the net difference in source blocks saved we get by

1076

# pretending it's a source rather than a sink.

1077

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1078

while True:

1079

u = heapq.heappop(heap)

1080

if u and u.item in G:

1081

u = u.item

1082

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1083

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1084

s1.append(u)

1085

del G[u]

1086

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1087

adjust_score(iu, +iu.incoming.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1088

if not iu.incoming:

1089

sources[iu] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1090

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1091

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1092

adjust_score(iu, -iu.outgoing.pop(u))

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1093

if not iu.outgoing:

1094

sinks[iu] = None

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1095

1096

# Now record the sequence in the 'order' field of each transfer,

1097

# and by rearranging self.transfers to be in the chosen sequence.

1098

1099

new_transfers = []

1100

for x in itertools.chain(s1, s2):

1101

x.order = len(new_transfers)

1102

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1107

1108

def GenerateDigraph(self):

1109

print("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1110

1111

# Each item of source_ranges will be:

1112

# - None, if that block is not used as a source,

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1113

# - an ordered set of transfers.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1114

source_ranges = []

1115

for b in self.transfers:

1116

for s, e in b.src_ranges:

1117

if e > len(source_ranges):

1118

source_ranges.extend([None] * (e-len(source_ranges)))

1119

for i in range(s, e):

1120

if source_ranges[i] is None:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1121

source_ranges[i] = OrderedDict.fromkeys([b])

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1122

else:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1123

source_ranges[i][b] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1124

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1125

for a in self.transfers:

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1126

intersections = OrderedDict()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1127

for s, e in a.tgt_ranges:

1128

for i in range(s, e):

1129

if i >= len(source_ranges): break

Tao Bao

2016-10-24 16:49:08 -0700

[diff] [blame]

1130

# Add all the Transfers in source_ranges[i] to the (ordered) set.

1131

if source_ranges[i] is not None:

1132

for j in source_ranges[i]:

1133

intersections[j] = None

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1134

1135

for b in intersections:

1136

if a is b: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1137

1138

# If the blocks written by A are read by B, then B needs to go before A.

1139

i = a.tgt_ranges.intersect(b.src_ranges)

1140

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1141

if b.src_name == "__ZERO":

1142

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1147

b.goes_before[a] = size

1148

a.goes_after[b] = size

1149

1150

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1151

"""Parse the file_map to generate all the transfers."""

1152

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1153

def AddSplitTransfers(tgt_name, src_name, tgt_ranges, src_ranges,

1154

style, by_id):

1155

"""Add one or multiple Transfer()s by splitting large files.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1156

1157

For BBOTA v3, we need to stash source blocks for resumable feature.

1158

However, with the growth of file size and the shrink of the cache

1159

partition source blocks are too large to be stashed. If a file occupies

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1160

too many blocks, we split it into smaller pieces by getting multiple

1161

Transfer()s.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1162

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1163

The downside is that after splitting, we may increase the package size

1164

since the split pieces don't align well. According to our experiments,

1165

1/8 of the cache size as the per-piece limit appears to be optimal.

1166

Compared to the fixed 1024-block limit, it reduces the overall package

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1167

size by 30% for volantis, and 20% for angler and bullhead."""

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1168

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1169

# Possibly split large files into smaller chunks.

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1170

pieces = 0

1171

cache_size = common.OPTIONS.cache_size

1172

split_threshold = 0.125

1173

max_blocks_per_transfer = int(cache_size * split_threshold /

1174

self.tgt.blocksize)

1175

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1176

# Change nothing for small files.

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1177

if (tgt_ranges.size() <= max_blocks_per_transfer and

1178

src_ranges.size() <= max_blocks_per_transfer):

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1179

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1180

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1181

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1182

return

1183

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1184

while (tgt_ranges.size() > max_blocks_per_transfer and

1185

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1186

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1187

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1188

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1189

src_first = src_ranges.first(max_blocks_per_transfer)

1190

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1191

Transfer(tgt_split_name, src_split_name, tgt_first, src_first,

1192

self.tgt.RangeSha1(tgt_first), self.src.RangeSha1(src_first),

1193

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1194

1195

tgt_ranges = tgt_ranges.subtract(tgt_first)

1196

src_ranges = src_ranges.subtract(src_first)

1197

pieces += 1

1198

1199

# Handle remaining blocks.

1200

if tgt_ranges.size() or src_ranges.size():

1201

# Must be both non-empty.

1202

assert tgt_ranges.size() and src_ranges.size()

1203

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1204

src_split_name = "%s-%d" % (src_name, pieces)

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1205

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges,

1206

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1207

style, by_id)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1208

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1209

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1210

split=False):

1211

"""Wrapper function for adding a Transfer()."""

1212

1213

# We specialize diff transfers only (which covers bsdiff/imgdiff/move);

1214

# otherwise add the Transfer() as is.

1215

if style != "diff" or not split:

Tao Bao

2017-03-05 17:05:09 -0800

[diff] [blame]

1216

Transfer(tgt_name, src_name, tgt_ranges, src_ranges,

1217

self.tgt.RangeSha1(tgt_ranges), self.src.RangeSha1(src_ranges),

1218

style, by_id)

Tao Bao

2016-09-19 22:26:30 -0700

[diff] [blame]

1219

return

1220

1221

# Handle .odex files specially to analyze the block-wise difference. If

1222

# most of the blocks are identical with only few changes (e.g. header),

1223

# we will patch the changed blocks only. This avoids stashing unchanged

1224

# blocks while patching. We limit the analysis to files without size

1225

# changes only. This is to avoid sacrificing the OTA generation cost too

1226

# much.

1227

if (tgt_name.split(".")[-1].lower() == 'odex' and

1228

tgt_ranges.size() == src_ranges.size()):

1229

1230

# 0.5 threshold can be further tuned. The tradeoff is: if only very

1231

# few blocks remain identical, we lose the opportunity to use imgdiff

1232

# that may have better compression ratio than bsdiff.

1233

crop_threshold = 0.5

1234

1235

tgt_skipped = RangeSet()

1236

src_skipped = RangeSet()

1237

tgt_size = tgt_ranges.size()

1238

tgt_changed = 0

1239

for src_block, tgt_block in zip(src_ranges.next_item(),

1240

tgt_ranges.next_item()):

1241

src_rs = RangeSet(str(src_block))

1242

tgt_rs = RangeSet(str(tgt_block))

1243

if self.src.ReadRangeSet(src_rs) == self.tgt.ReadRangeSet(tgt_rs):

1244

tgt_skipped = tgt_skipped.union(tgt_rs)

1245

src_skipped = src_skipped.union(src_rs)

1246

else:

1247

tgt_changed += tgt_rs.size()

1248

1249

# Terminate early if no clear sign of benefits.

1250

if tgt_changed > tgt_size * crop_threshold:

1251

break

1252

1253

if tgt_changed < tgt_size * crop_threshold:

1254

assert tgt_changed + tgt_skipped.size() == tgt_size

1255

print('%10d %10d (%6.2f%%) %s' % (tgt_skipped.size(), tgt_size,

1256

tgt_skipped.size() * 100.0 / tgt_size, tgt_name))

1257

AddSplitTransfers(

1258

"%s-skipped" % (tgt_name,),

1259

"%s-skipped" % (src_name,),

1260

tgt_skipped, src_skipped, style, by_id)

1261

1262

# Intentionally change the file extension to avoid being imgdiff'd as

1263

# the files are no longer in their original format.

1264

tgt_name = "%s-cropped" % (tgt_name,)

1265

src_name = "%s-cropped" % (src_name,)

1266

tgt_ranges = tgt_ranges.subtract(tgt_skipped)

1267

src_ranges = src_ranges.subtract(src_skipped)

1268

1269

# Possibly having no changed blocks.

if not tgt_ranges:

return

# Add the transfer(s).

1274

AddSplitTransfers(

1275

tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1276

1277

print("Finding transfers...")

1278

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1279

empty = RangeSet()

1280

for tgt_fn, tgt_ranges in self.tgt.file_map.items():

1281

if tgt_fn == "__ZERO":

1282

# the special "__ZERO" domain is all the blocks not contained

1283

# in any file and that are filled with zeros. We have a

1284

# special transfer style for zero blocks.

1285

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1286

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1287

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1288

continue

1289

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1290

elif tgt_fn == "__COPY":

1291

# "__COPY" domain includes all the blocks not contained in any

1292

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1293

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

ff77781

2015-05-12 11:42:31 -0700

[diff] [blame]

1294

continue

1295

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1296

elif tgt_fn in self.src.file_map:

1297

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1298

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1299

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1300

continue

1301

1302

b = os.path.basename(tgt_fn)

1303

if b in self.src_basenames:

1304

# Look for an exact basename match in the source.

1305

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1306

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1307

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1308

continue

1309

1310

b = re.sub("[0-9]+", "#", b)

1311

if b in self.src_numpatterns:

1312

# Look for a 'number pattern' match (a basename match after

1313

# all runs of digits are replaced by "#"). (This is useful

1314

# for .so files that contain version numbers in the filename

1315

# that get bumped.)

1316

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1317

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

Tao Bao

2017-03-01 14:36:26 -0800

[diff] [blame]

1318

"diff", self.transfers, True)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1319

continue

1320

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1321

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1322

1323

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1324

for k in self.src.file_map.keys():

1325

b = os.path.basename(k)

1326

self.src_basenames[b] = k

1327

b = re.sub("[0-9]+", "#", b)

1328

self.src_numpatterns[b] = k

1329

1330

@staticmethod

1331

def AssertPartition(total, seq):

1332

"""Assert that all the RangeSets in 'seq' form a partition of the

1333

'total' RangeSet (ie, they are nonintersecting and their union

1334

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1335

Doug Zongker