Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

17

from collections import deque, OrderedDict

18

from hashlib import sha1

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import array

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

20

import common

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

21

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

22

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

23

import itertools

24

import multiprocessing

25

import os

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

27

import subprocess

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

29

import time

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

30

import tempfile

31

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

32

from rangelib import RangeSet

33

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

34

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

35

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

36

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

37

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

38

def compute_patch(src, tgt, imgdiff=False):

39

srcfd, srcfile = tempfile.mkstemp(prefix="src-")

40

tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")

41

patchfd, patchfile = tempfile.mkstemp(prefix="patch-")

os.close(patchfd)

try:

with os.fdopen(srcfd, "wb") as f_src:

for p in src:

f_src.write(p)

with os.fdopen(tgtfd, "wb") as f_tgt:

for p in tgt:

f_tgt.write(p)

try:

os.unlink(patchfile)

except OSError:

pass

if imgdiff:

p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],

58

stdout=open("/dev/null", "a"),

59

stderr=subprocess.STDOUT)

60

else:

61

p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])

62

63

if p:

64

raise ValueError("diff failed: " + str(p))

65

66

with open(patchfile, "rb") as f:

return f.read()

finally:

try:

os.unlink(srcfile)

os.unlink(tgtfile)

os.unlink(patchfile)

except OSError:

pass

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

76

77

class Image(object):

78

def ReadRangeSet(self, ranges):

79

raise NotImplementedError

80

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

81

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

82

raise NotImplementedError

83

84

85

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

86

"""A zero-length image."""

87

blocksize = 4096

88

care_map = RangeSet()

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

89

clobbered_blocks = RangeSet()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

90

extended = RangeSet()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

91

total_blocks = 0

92

file_map = {}

93

def ReadRangeSet(self, ranges):

94

return ()

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

95

def TotalSha1(self, include_clobbered_blocks=False):

96

# EmptyImage always carries empty clobbered_blocks, so

97

# include_clobbered_blocks can be ignored.

98

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

99

return sha1().hexdigest()

100

101

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

102

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

103

"""An image wrapped around a single string of data."""

104

105

def __init__(self, data, trim=False, pad=False):

106

self.data = data

107

self.blocksize = 4096

108

109

assert not (trim and pad)

110

111

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

112

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

113

if partial > 0:

114

if trim:

115

self.data = self.data[:-partial]

116

elif pad:

117

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

118

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

119

else:

120

raise ValueError(("data for DataImage must be multiple of %d bytes "

121

"unless trim or pad is specified") %

122

(self.blocksize,))

123

124

assert len(self.data) % self.blocksize == 0

125

126

self.total_blocks = len(self.data) / self.blocksize

127

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

128

# When the last block is padded, we always write the whole block even for

129

# incremental OTAs. Because otherwise the last block may get skipped if

130

# unchanged for an incremental, but would fail the post-install

131

# verification if it has non-zero contents in the padding bytes.

132

# Bug: 23828506

133

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

134

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

135

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

136

clobbered_blocks = []

137

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

138

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

143

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

144

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

145

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

146

if d == reference:

147

zero_blocks.append(i)

148

zero_blocks.append(i+1)

149

else:

150

nonzero_blocks.append(i)

151

nonzero_blocks.append(i+1)

152

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

153

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

154

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

155

self.file_map = dict()

156

if zero_blocks:

157

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

158

if nonzero_blocks:

159

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

160

if clobbered_blocks:

161

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

162

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

163

def ReadRangeSet(self, ranges):

164

return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]

165

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

166

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

167

if not include_clobbered_blocks:

168

ranges = self.care_map.subtract(self.clobbered_blocks)

169

return sha1(self.ReadRangeSet(ranges)).hexdigest()

170

else:

171

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

172

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

173

174

class Transfer(object):

175

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):

176

self.tgt_name = tgt_name

177

self.src_name = src_name

178

self.tgt_ranges = tgt_ranges

179

self.src_ranges = src_ranges

180

self.style = style

181

self.intact = (getattr(tgt_ranges, "monotonic", False) and

182

getattr(src_ranges, "monotonic", False))

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

183

184

# We use OrderedDict rather than dict so that the output is repeatable;

185

# otherwise it would depend on the hash values of the Transfer objects.

186

self.goes_before = OrderedDict()

187

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

188

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

189

self.stash_before = []

190

self.use_stash = []

191

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

195

def NetStashChange(self):

196

return (sum(sr.size() for (_, sr) in self.stash_before) -

197

sum(sr.size() for (_, sr) in self.use_stash))

198

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

199

def ConvertToNew(self):

200

assert self.style != "new"

201

self.use_stash = []

202

self.style = "new"

203

self.src_ranges = RangeSet()

204

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

205

def __str__(self):

206

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

207

" to " + str(self.tgt_ranges) + ">")

208

209

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

210

@functools.total_ordering

211

class HeapItem(object):

212

def __init__(self, item):

213

self.item = item

214

# Negate the score since python's heap is a min-heap and we want

215

# the maximum score.

216

self.score = -item.score

def clear(self):

self.item = None

def __bool__(self):

return self.item is None

221

def __eq__(self, other):

222

return self.score == other.score

223

def __le__(self, other):

224

return self.score <= other.score

225

226

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

227

# BlockImageDiff works on two image objects. An image object is

228

# anything that provides the following attributes:

229

#

230

# blocksize: the size in bytes of a block, currently must be 4096.

231

#

232

# total_blocks: the total size of the partition/image, in blocks.

233

#

234

# care_map: a RangeSet containing which blocks (in the range [0,

235

# total_blocks) we actually care about; i.e. which blocks contain

236

# data.

237

#

238

# file_map: a dict that partitions the blocks contained in care_map

239

# into smaller domains that are useful for doing diffs on.

240

# (Typically a domain is a file, and the key in file_map is the

241

# pathname.)

242

#

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

243

# clobbered_blocks: a RangeSet containing which blocks contain data

244

# but may be altered by the FS. They need to be excluded when

245

# verifying the partition integrity.

246

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

247

# ReadRangeSet(): a function that takes a RangeSet and returns the

248

# data contained in the image blocks of that RangeSet. The data

249

# is returned as a list or tuple of strings; concatenating the

250

# elements together should produce the requested data.

251

# Implementations are free to break up the data into list/tuple

252

# elements in any way that is convenient.

253

#

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

254

# TotalSha1(): a function that returns (as a hex string) the SHA-1

255

# hash of all the data in the image (ie, all the blocks in the

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

256

# care_map minus clobbered_blocks, or including the clobbered

257

# blocks if include_clobbered_blocks is True).

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

258

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

259

# When creating a BlockImageDiff, the src image may be None, in which

260

# case the list of transfers produced will never read from the

261

# original image.

262

263

class BlockImageDiff(object):

Tao Bao

eba409c

2015-10-21 13:30:43 -0700

[diff] [blame]

264

def __init__(self, tgt, src=None, threads=None, version=4):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

265

if threads is None:

266

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

267

if threads == 0:

268

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

269

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

270

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

271

self.transfers = []

272

self.src_basenames = {}

273

self.src_numpatterns = {}

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

274

Tao Bao

eba409c

2015-10-21 13:30:43 -0700

[diff] [blame]

275

assert version in (1, 2, 3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

283

assert tgt.blocksize == 4096

284

assert src.blocksize == 4096

285

286

# The range sets in each filemap should comprise a partition of

287

# the care map.

288

self.AssertPartition(src.care_map, src.file_map.values())

289

self.AssertPartition(tgt.care_map, tgt.file_map.values())

290

291

def Compute(self, prefix):

292

# When looking for a source file to use as the diff input for a

293

# target file, we try:

294

# 1) an exact path match if available, otherwise

295

# 2) a exact basename match if available, otherwise

296

# 3) a basename match after all runs of digits are replaced by

297

# "#" if available, otherwise

298

# 4) we have no source for this target.

299

self.AbbreviateSourceNames()

300

self.FindTransfers()

301

302

# Find the ordering dependencies among transfers (this is O(n^2)

303

# in the number of transfers).

304

self.GenerateDigraph()

305

# Find a sequence of transfers that satisfies as many ordering

306

# dependencies as possible (heuristically).

307

self.FindVertexSequence()

308

# Fix up the ordering dependencies that the sequence didn't

309

# satisfy.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

310

if self.version == 1:

311

self.RemoveBackwardEdges()

312

else:

313

self.ReverseBackwardEdges()

314

self.ImproveVertexSequence()

315

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

316

# Ensure the runtime stash size is under the limit.

317

if self.version >= 2 and common.OPTIONS.cache_size is not None:

318

self.ReviseStashSize()

319

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

320

# Double-check our work.

321

self.AssertSequenceGood()

322

323

self.ComputePatches(prefix)

324

self.WriteTransfers(prefix)

325

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

326

def HashBlocks(self, source, ranges): # pylint: disable=no-self-use

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

327

data = source.ReadRangeSet(ranges)

ctx = sha1()

for p in data:

ctx.update(p)

return ctx.hexdigest()

334

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

335

def WriteTransfers(self, prefix):

336

out = []

337

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

338

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

339

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

340

stashes = {}

341

stashed_blocks = 0

342

max_stashed_blocks = 0

free_stash_ids = []

next_stash_id = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

347

for xf in self.transfers:

348

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

349

if self.version < 2:

350

assert not xf.stash_before

351

assert not xf.use_stash

352

353

for s, sr in xf.stash_before:

354

assert s not in stashes

355

if free_stash_ids:

356

sid = heapq.heappop(free_stash_ids)

else:

sid = next_stash_id

next_stash_id += 1

stashes[s] = sid

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

361

if self.version == 2:

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

362

stashed_blocks += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

363

out.append("stash %d %s\n" % (sid, sr.to_string_raw()))

364

else:

365

sh = self.HashBlocks(self.src, sr)

if sh in stashes:

stashes[sh] += 1

else:

stashes[sh] = 1

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

370

stashed_blocks += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

371

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

372

373

if stashed_blocks > max_stashed_blocks:

374

max_stashed_blocks = stashed_blocks

375

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

376

free_string = []

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

377

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

378

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

379

if self.version == 1:

Tao Bao

4fcb77e

2015-10-21 13:36:01 -0700

[diff] [blame]

380

src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

381

elif self.version >= 2:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

382

383

# <# blocks> <src ranges>

384

# OR

385

# <# blocks> <src ranges> <src locs> <stash refs...>

386

# OR

387

# <# blocks> - <stash refs...>

388

389

size = xf.src_ranges.size()

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

390

src_str = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

391

392

unstashed_src_ranges = xf.src_ranges

393

mapped_stashes = []

394

for s, sr in xf.use_stash:

395

sid = stashes.pop(s)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

396

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

397

sh = self.HashBlocks(self.src, sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

398

sr = xf.src_ranges.map_within(sr)

399

mapped_stashes.append(sr)

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

400

if self.version == 2:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

401

src_str.append("%d:%s" % (sid, sr.to_string_raw()))

Tao Bao

bb625d2

2015-08-13 14:44:15 -0700

[diff] [blame]

402

# A stash will be used only once. We need to free the stash

403

# immediately after the use, instead of waiting for the automatic

404

# clean-up at the end. Because otherwise it may take up extra space

405

# and lead to OTA failures.

406

# Bug: 23119955

407

free_string.append("free %d\n" % (sid,))

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

408

free_size += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

409

else:

410

assert sh in stashes

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

411

src_str.append("%s:%s" % (sh, sr.to_string_raw()))

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

412

stashes[sh] -= 1

413

if stashes[sh] == 0:

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

414

free_size += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

415

free_string.append("free %s\n" % (sh))

416

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

417

heapq.heappush(free_stash_ids, sid)

418

419

if unstashed_src_ranges:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

420

src_str.insert(1, unstashed_src_ranges.to_string_raw())

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

421

if xf.use_stash:

422

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

423

src_str.insert(2, mapped_unstashed.to_string_raw())

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

424

mapped_stashes.append(mapped_unstashed)

425

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

426

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

427

src_str.insert(1, "-")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

428

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

429

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

430

src_str = " ".join(src_str)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

431

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

432

# all versions:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

# zero <rangeset>

# new <rangeset>

# erase <rangeset>

#

# version 1:

# bsdiff patchstart patchlen <src rangeset> <tgt rangeset>

439

# imgdiff patchstart patchlen <src rangeset> <tgt rangeset>

440

# move <src rangeset> <tgt rangeset>

441

#

442

# version 2:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

443

# bsdiff patchstart patchlen <tgt rangeset> <src_str>

444

# imgdiff patchstart patchlen <tgt rangeset> <src_str>

445

# move <tgt rangeset> <src_str>

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

446

#

447

# version 3:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

448

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

449

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

450

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

451

452

tgt_size = xf.tgt_ranges.size()

453

454

if xf.style == "new":

455

assert xf.tgt_ranges

456

out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))

457

total += tgt_size

458

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

459

assert xf.tgt_ranges

460

assert xf.src_ranges.size() == tgt_size

461

if xf.src_ranges != xf.tgt_ranges:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

462

if self.version == 1:

463

out.append("%s %s %s\n" % (

464

xf.style,

465

xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))

466

elif self.version == 2:

467

out.append("%s %s %s\n" % (

468

xf.style,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

469

xf.tgt_ranges.to_string_raw(), src_str))

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

470

elif self.version >= 3:

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

471

# take into account automatic stashing of overlapping blocks

472

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

473

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

474

if temp_stash_usage > max_stashed_blocks:

475

max_stashed_blocks = temp_stash_usage

476

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

477

out.append("%s %s %s %s\n" % (

478

xf.style,

479

self.HashBlocks(self.tgt, xf.tgt_ranges),

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

480

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

481

total += tgt_size

482

elif xf.style in ("bsdiff", "imgdiff"):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

483

assert xf.tgt_ranges

484

assert xf.src_ranges

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

485

if self.version == 1:

486

out.append("%s %d %d %s %s\n" % (

487

xf.style, xf.patch_start, xf.patch_len,

488

xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))

489

elif self.version == 2:

490

out.append("%s %d %d %s %s\n" % (

491

xf.style, xf.patch_start, xf.patch_len,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

492

xf.tgt_ranges.to_string_raw(), src_str))

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

493

elif self.version >= 3:

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

494

# take into account automatic stashing of overlapping blocks

495

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

496

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

497

if temp_stash_usage > max_stashed_blocks:

498

max_stashed_blocks = temp_stash_usage

499

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

500

out.append("%s %d %d %s %s %s %s\n" % (

501

xf.style,

502

xf.patch_start, xf.patch_len,

503

self.HashBlocks(self.src, xf.src_ranges),

504

self.HashBlocks(self.tgt, xf.tgt_ranges),

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

505

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

506

total += tgt_size

507

elif xf.style == "zero":

508

assert xf.tgt_ranges

509

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

510

if to_zero:

511

out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))

512

total += to_zero.size()

513

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

514

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

515

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

516

if free_string:

517

out.append("".join(free_string))

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

518

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

519

Tao Bao

575d68a

2015-08-07 19:49:45 -0700

[diff] [blame]

520

if self.version >= 2 and common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

521

# Sanity check: abort if we're going to need more stash space than

522

# the allowed size (cache_size * threshold). There are two purposes

523

# of having a threshold here. a) Part of the cache may have been

524

# occupied by some recovery logs. b) It will buy us some time to deal

525

# with the oversize issue.

526

cache_size = common.OPTIONS.cache_size

527

stash_threshold = common.OPTIONS.stash_threshold

528

max_allowed = cache_size * stash_threshold

529

assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \

530

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

531

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

532

self.tgt.blocksize, max_allowed, cache_size,

533

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

534

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

535

# Zero out extended blocks as a workaround for bug 20881595.

536

if self.tgt.extended:

537

out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

538

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

539

540

# We erase all the blocks on the partition that a) don't contain useful

541

# data in the new image and b) will not be touched by dm-verity.

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

542

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

543

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

544

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

545

if new_dontcare:

546

out.append("erase %s\n" % (new_dontcare.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

547

548

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

549

out.insert(1, "%d\n" % (total,))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

550

if self.version >= 2:

551

# version 2 only: after the total block count, we give the number

552

# of stash slots needed, and the maximum size needed (in blocks)

553

out.insert(2, str(next_stash_id) + "\n")

554

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

555

556

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

560

if self.version >= 2:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

561

max_stashed_size = max_stashed_blocks * self.tgt.blocksize

Tao Bao

575d68a

2015-08-07 19:49:45 -0700

[diff] [blame]

562

OPTIONS = common.OPTIONS

563

if OPTIONS.cache_size is not None:

564

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

565

print("max stashed blocks: %d (%d bytes), "

566

"limit: %d bytes (%.2f%%)\n" % (

567

max_stashed_blocks, max_stashed_size, max_allowed,

568

max_stashed_size * 100.0 / max_allowed))

569

else:

570

print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (

571

max_stashed_blocks, max_stashed_size))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

572

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

573

def ReviseStashSize(self):

574

print("Revising stash size...")

575

stashes = {}

576

577

# Create the map between a stash and its def/use points. For example, for a

578

# given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).

579

for xf in self.transfers:

580

# Command xf defines (stores) all the stashes in stash_before.

581

for idx, sr in xf.stash_before:

582

stashes[idx] = (sr, xf)

583

584

# Record all the stashes command xf uses.

585

for idx, _ in xf.use_stash:

586

stashes[idx] += (xf,)

587

588

# Compute the maximum blocks available for stash based on /cache size and

589

# the threshold.

590

cache_size = common.OPTIONS.cache_size

591

stash_threshold = common.OPTIONS.stash_threshold

592

max_allowed = cache_size * stash_threshold / self.tgt.blocksize

593

594

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

595

new_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

596

597

# Now go through all the commands. Compute the required stash size on the

598

# fly. If a command requires excess stash than available, it deletes the

599

# stash by replacing the command that uses the stash with a "new" command

600

# instead.

601

for xf in self.transfers:

602

replaced_cmds = []

603

604

# xf.stash_before generates explicit stash commands.

605

for idx, sr in xf.stash_before:

606

if stashed_blocks + sr.size() > max_allowed:

607

# We cannot stash this one for a later command. Find out the command

608

# that will use this stash and replace the command with "new".

609

use_cmd = stashes[idx][2]

610

replaced_cmds.append(use_cmd)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

611

print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

612

else:

613

stashed_blocks += sr.size()

614

615

# xf.use_stash generates free commands.

616

for _, sr in xf.use_stash:

617

stashed_blocks -= sr.size()

618

619

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

620

# ComputePatches(), they both have the style of "diff".

621

if xf.style == "diff" and self.version >= 3:

622

assert xf.tgt_ranges and xf.src_ranges

623

if xf.src_ranges.overlaps(xf.tgt_ranges):

624

if stashed_blocks + xf.src_ranges.size() > max_allowed:

625

replaced_cmds.append(xf)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

626

print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

627

628

# Replace the commands in replaced_cmds with "new"s.

629

for cmd in replaced_cmds:

630

# It no longer uses any commands in "use_stash". Remove the def points

631

# for all those stashes.

632

for idx, sr in cmd.use_stash:

633

def_cmd = stashes[idx][1]

634

assert (idx, sr) in def_cmd.stash_before

635

def_cmd.stash_before.remove((idx, sr))

636

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

637

# Add up blocks that violates space limit and print total number to

638

# screen later.

639

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

640

cmd.ConvertToNew()

641

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

642

num_of_bytes = new_blocks * self.tgt.blocksize

643

print(" Total %d blocks (%d bytes) are packed as new blocks due to "

644

"insufficient cache size." % (new_blocks, num_of_bytes))

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

645

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

646

def ComputePatches(self, prefix):

647

print("Reticulating splines...")

648

diff_q = []

649

patch_num = 0

650

with open(prefix + ".new.dat", "wb") as new_f:

651

for xf in self.transfers:

652

if xf.style == "zero":

653

pass

654

elif xf.style == "new":

655

for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):

656

new_f.write(piece)

657

elif xf.style == "diff":

658

src = self.src.ReadRangeSet(xf.src_ranges)

659

tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)

660

661

# We can't compare src and tgt directly because they may have

662

# the same content but be broken up into blocks differently, eg:

663

#

664

# ["he", "llo"] vs ["h", "ello"]

665

#

666

# We want those to compare equal, ideally without having to

667

# actually concatenate the strings (these may be tens of

# megabytes).

src_sha1 = sha1()

for p in src:

src_sha1.update(p)

tgt_sha1 = sha1()

tgt_size = 0

for p in tgt:

tgt_sha1.update(p)

tgt_size += len(p)

if src_sha1.digest() == tgt_sha1.digest():

680

# These are identical; we don't need to generate a patch,

681

# just issue copy commands on the device.

682

xf.style = "move"

683

else:

684

# For files in zip format (eg, APKs, JARs, etc.) we would

685

# like to use imgdiff -z if possible (because it usually

686

# produces significantly smaller patches than bsdiff).

687

# This is permissible if:

688

#

689

# - the source and target files are monotonic (ie, the

690

# data is stored with blocks in increasing order), and

691

# - we haven't removed any blocks from the source set.

692

#

693

# If these conditions are satisfied then appending all the

694

# blocks in the set together in order will produce a valid

695

# zip file (plus possibly extra zeros in the last block),

696

# which is what imgdiff needs to operate. (imgdiff is

697

# fine with extra zeros at the end of the file.)

698

imgdiff = (xf.intact and

699

xf.tgt_name.split(".")[-1].lower()

700

in ("apk", "jar", "zip"))

701

xf.style = "imgdiff" if imgdiff else "bsdiff"

702

diff_q.append((tgt_size, src, tgt, xf, patch_num))

patch_num += 1

else:

assert False, "unknown style " + xf.style

if diff_q:

if self.threads > 1:

print("Computing patches (using %d threads)..." % (self.threads,))

711

else:

712

print("Computing patches...")

713

diff_q.sort()

714

715

patches = [None] * patch_num

716

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

717

# TODO: Rewrite with multiprocessing.ThreadPool?

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

718

lock = threading.Lock()

719

def diff_worker():

720

while True:

721

with lock:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

722

if not diff_q:

723

return

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

724

tgt_size, src, tgt, xf, patchnum = diff_q.pop()

725

patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))

726

size = len(patch)

727

with lock:

728

patches[patchnum] = (patch, xf)

729

print("%10d %10d (%6.2f%%) %7s %s" % (

730

size, tgt_size, size * 100.0 / tgt_size, xf.style,

731

xf.tgt_name if xf.tgt_name == xf.src_name else (

732

xf.tgt_name + " (from " + xf.src_name + ")")))

733

734

threads = [threading.Thread(target=diff_worker)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

735

for _ in range(self.threads)]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for th in threads:

th.start()

while threads:

threads.pop().join()

else:

patches = []

p = 0

with open(prefix + ".patch.dat", "wb") as patch_f:

745

for patch, xf in patches:

746

xf.patch_start = p

747

xf.patch_len = len(patch)

patch_f.write(patch)

p += len(patch)

def AssertSequenceGood(self):

752

# Simulate the sequences of transfers we will output, and check that:

753

# - we never read a block after writing it, and

754

# - we write every block we care about exactly once.

755

756

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

757

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

758

759

# Imagine processing the transfers in order.

760

for xf in self.transfers:

761

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

762

763

x = xf.src_ranges

764

if self.version >= 2:

765

for _, sr in xf.use_stash:

766

x = x.subtract(sr)

767

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

768

for s, e in x:

769

for i in range(s, e):

770

assert touched[i] == 0

771

772

# Check that the output blocks for this transfer haven't yet

773

# been touched, and touch all the blocks written by this

774

# transfer.

775

for s, e in xf.tgt_ranges:

776

for i in range(s, e):

777

assert touched[i] == 0

778

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

779

780

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

781

for s, e in self.tgt.care_map:

782

for i in range(s, e):

783

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

784

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

785

def ImproveVertexSequence(self):

786

print("Improving vertex order...")

787

788

# At this point our digraph is acyclic; we reversed any edges that

789

# were backwards in the heuristically-generated sequence. The

790

# previously-generated order is still acceptable, but we hope to

791

# find a better order that needs less memory for stashed data.

792

# Now we do a topological sort to generate a new vertex order,

793

# using a greedy algorithm to choose which vertex goes next

794

# whenever we have a choice.

795

796

# Make a copy of the edge set; this copy will get destroyed by the

797

# algorithm.

798

for xf in self.transfers:

799

xf.incoming = xf.goes_after.copy()

800

xf.outgoing = xf.goes_before.copy()

801

802

L = [] # the new vertex order

803

804

# S is the set of sources in the remaining graph; we always choose

805

# the one that leaves the least amount of stashed data after it's

806

# executed.

807

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

813

L.append(xf)

814

for u in xf.outgoing:

815

del u.incoming[xf]

816

if not u.incoming:

817

heapq.heappush(S, (u.NetStashChange(), u.order, u))

818

819

# if this fails then our graph had a cycle.

820

assert len(L) == len(self.transfers)

821

822

self.transfers = L

823

for i, xf in enumerate(L):

824

xf.order = i

825

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

826

def RemoveBackwardEdges(self):

827

print("Removing backward edges...")

in_order = 0

out_of_order = 0

lost_source = 0

for xf in self.transfers:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

833

lost = 0

834

size = xf.src_ranges.size()

835

for u in xf.goes_before:

836

# xf should go before u

837

if xf.order < u.order:

838

# it does, hurray!

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

839

in_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

840

else:

841

# it doesn't, boo. trim the blocks that u writes from xf's

842

# source, so that xf can go after u.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

843

out_of_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

844

assert xf.src_ranges.overlaps(u.tgt_ranges)

845

xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)

846

xf.intact = False

847

848

if xf.style == "diff" and not xf.src_ranges:

849

# nothing left to diff from; treat as new data

850

xf.style = "new"

851

852

lost = size - xf.src_ranges.size()

853

lost_source += lost

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

854

855

print((" %d/%d dependencies (%.2f%%) were violated; "

856

"%d source blocks removed.") %

857

(out_of_order, in_order + out_of_order,

858

(out_of_order * 100.0 / (in_order + out_of_order))

859

if (in_order + out_of_order) else 0.0,

860

lost_source))

861

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

862

def ReverseBackwardEdges(self):

863

print("Reversing backward edges...")

in_order = 0

out_of_order = 0

stashes = 0

stash_size = 0

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

870

for u in xf.goes_before.copy():

871

# xf should go before u

872

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

877

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

882

assert overlap

883

884

u.stash_before.append((stashes, overlap))

885

xf.use_stash.append((stashes, overlap))

886

stashes += 1

887

stash_size += overlap.size()

888

889

# reverse the edge direction; now xf must go after u

890

del xf.goes_before[u]

891

del u.goes_after[xf]

892

xf.goes_after[u] = None # value doesn't matter

893

u.goes_before[xf] = None

894

895

print((" %d/%d dependencies (%.2f%%) were violated; "

896

"%d source blocks stashed.") %

897

(out_of_order, in_order + out_of_order,

898

(out_of_order * 100.0 / (in_order + out_of_order))

899

if (in_order + out_of_order) else 0.0,

900

stash_size))

901

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

902

def FindVertexSequence(self):

903

print("Finding vertex sequence...")

904

905

# This is based on "A Fast & Effective Heuristic for the Feedback

906

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

907

# it as starting with the digraph G and moving all the vertices to

908

# be on a horizontal line in some order, trying to minimize the

909

# number of edges that end up pointing to the left. Left-pointing

910

# edges will get removed to turn the digraph into a DAG. In this

911

# case each edge has a weight which is the number of source blocks

912

# we'll lose if that edge is removed; we try to minimize the total

913

# weight rather than just the number of edges.

914

915

# Make a copy of the edge set; this copy will get destroyed by the

916

# algorithm.

917

for xf in self.transfers:

918

xf.incoming = xf.goes_after.copy()

919

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

920

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

921

922

# We use an OrderedDict instead of just a set so that the output

923

# is repeatable; otherwise it would depend on the hash values of

924

# the transfer objects.

925

G = OrderedDict()

926

for xf in self.transfers:

927

G[xf] = None

928

s1 = deque() # the left side of the sequence, built from left to right

929

s2 = deque() # the right side of the sequence, built from right to left

930

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

931

heap = []

932

for xf in self.transfers:

933

xf.heap_item = HeapItem(xf)

934

heap.append(xf.heap_item)

935

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

936

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

937

sinks = set(u for u in G if not u.outgoing)

938

sources = set(u for u in G if not u.incoming)

939

940

def adjust_score(iu, delta):

941

iu.score += delta

942

iu.heap_item.clear()

943

iu.heap_item = HeapItem(iu)

944

heapq.heappush(heap, iu.heap_item)

945

946

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

947

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

948

while sinks:

949

new_sinks = set()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

950

for u in sinks:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

951

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

952

s2.appendleft(u)

953

del G[u]

954

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

955

adjust_score(iu, -iu.outgoing.pop(u))

956

if not iu.outgoing: new_sinks.add(iu)

957

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

958

959

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

960

while sources:

961

new_sources = set()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

962

for u in sources:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

963

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

964

s1.append(u)

965

del G[u]

966

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

967

adjust_score(iu, +iu.incoming.pop(u))

968

if not iu.incoming: new_sources.add(iu)

969

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

970

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

971

if not G: break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

972

973

# Find the "best" vertex to put next. "Best" is the one that

974

# maximizes the net difference in source blocks saved we get by

975

# pretending it's a source rather than a sink.

976

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

977

while True:

978

u = heapq.heappop(heap)

979

if u and u.item in G:

980

u = u.item

981

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

982

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

983

s1.append(u)

984

del G[u]

985

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

986

adjust_score(iu, +iu.incoming.pop(u))

987

if not iu.incoming: sources.add(iu)

988

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

989

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

990

adjust_score(iu, -iu.outgoing.pop(u))

991

if not iu.outgoing: sinks.add(iu)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

992

993

# Now record the sequence in the 'order' field of each transfer,

994

# and by rearranging self.transfers to be in the chosen sequence.

995

996

new_transfers = []

997

for x in itertools.chain(s1, s2):

998

x.order = len(new_transfers)

999

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1004

1005

def GenerateDigraph(self):

1006

print("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1007

1008

# Each item of source_ranges will be:

1009

# - None, if that block is not used as a source,

1010

# - a transfer, if one transfer uses it as a source, or

1011

# - a set of transfers.

1012

source_ranges = []

1013

for b in self.transfers:

1014

for s, e in b.src_ranges:

1015

if e > len(source_ranges):

1016

source_ranges.extend([None] * (e-len(source_ranges)))

1017

for i in range(s, e):

1018

if source_ranges[i] is None:

1019

source_ranges[i] = b

1020

else:

1021

if not isinstance(source_ranges[i], set):

1022

source_ranges[i] = set([source_ranges[i]])

1023

source_ranges[i].add(b)

1024

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1025

for a in self.transfers:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1026

intersections = set()

1027

for s, e in a.tgt_ranges:

1028

for i in range(s, e):

1029

if i >= len(source_ranges): break

1030

b = source_ranges[i]

1031

if b is not None:

1032

if isinstance(b, set):

1033

intersections.update(b)

else:

intersections.add(b)

for b in intersections:

1038

if a is b: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1039

1040

# If the blocks written by A are read by B, then B needs to go before A.

1041

i = a.tgt_ranges.intersect(b.src_ranges)

1042

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1043

if b.src_name == "__ZERO":

1044

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1049

b.goes_before[a] = size

1050

a.goes_after[b] = size

1051

1052

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1053

"""Parse the file_map to generate all the transfers."""

1054

1055

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1056

split=False):

1057

"""Wrapper function for adding a Transfer().

1058

1059

For BBOTA v3, we need to stash source blocks for resumable feature.

1060

However, with the growth of file size and the shrink of the cache

1061

partition source blocks are too large to be stashed. If a file occupies

1062

too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it

1063

into smaller pieces by getting multiple Transfer()s.

1064

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1065

The downside is that after splitting, we may increase the package size

1066

since the split pieces don't align well. According to our experiments,

1067

1/8 of the cache size as the per-piece limit appears to be optimal.

1068

Compared to the fixed 1024-block limit, it reduces the overall package

1069

size by 30% volantis, and 20% for angler and bullhead."""

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1070

1071

# We care about diff transfers only.

1072

if style != "diff" or not split:

1073

Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1074

return

1075

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1076

pieces = 0

1077

cache_size = common.OPTIONS.cache_size

1078

split_threshold = 0.125

1079

max_blocks_per_transfer = int(cache_size * split_threshold /

1080

self.tgt.blocksize)

1081

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1082

# Change nothing for small files.

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1083

if (tgt_ranges.size() <= max_blocks_per_transfer and

1084

src_ranges.size() <= max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1085

Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1086

return

1087

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1088

while (tgt_ranges.size() > max_blocks_per_transfer and

1089

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1090

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1091

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1092

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1093

src_first = src_ranges.first(max_blocks_per_transfer)

1094

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1095

Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,

1096

by_id)

1097

1098

tgt_ranges = tgt_ranges.subtract(tgt_first)

1099

src_ranges = src_ranges.subtract(src_first)

1100

pieces += 1

1101

1102

# Handle remaining blocks.

1103

if tgt_ranges.size() or src_ranges.size():

1104

# Must be both non-empty.

1105

assert tgt_ranges.size() and src_ranges.size()

1106

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1107

src_split_name = "%s-%d" % (src_name, pieces)

1108

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,

1109

by_id)

1110

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1111

empty = RangeSet()

1112

for tgt_fn, tgt_ranges in self.tgt.file_map.items():

1113

if tgt_fn == "__ZERO":

1114

# the special "__ZERO" domain is all the blocks not contained

1115

# in any file and that are filled with zeros. We have a

1116

# special transfer style for zero blocks.

1117

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1118

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1119

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1120

continue

1121

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

1122

elif tgt_fn == "__COPY":

1123

# "__COPY" domain includes all the blocks not contained in any

1124

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1125

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

1126

continue

1127

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1128

elif tgt_fn in self.src.file_map:

1129

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1130

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

1131

"diff", self.transfers, self.version >= 3)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1132

continue

1133

1134

b = os.path.basename(tgt_fn)

1135

if b in self.src_basenames:

1136

# Look for an exact basename match in the source.

1137

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1138

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

1139

"diff", self.transfers, self.version >= 3)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1140

continue

1141

1142

b = re.sub("[0-9]+", "#", b)

1143

if b in self.src_numpatterns:

1144

# Look for a 'number pattern' match (a basename match after

1145

# all runs of digits are replaced by "#"). (This is useful

1146

# for .so files that contain version numbers in the filename

1147

# that get bumped.)

1148

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1149

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

1150

"diff", self.transfers, self.version >= 3)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1151

continue

1152

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1153

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1154

1155

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1156

for k in self.src.file_map.keys():

1157

b = os.path.basename(k)

1158

self.src_basenames[b] = k

1159

b = re.sub("[0-9]+", "#", b)

1160

self.src_numpatterns[b] = k

1161

1162

@staticmethod

1163

def AssertPartition(total, seq):

1164

"""Assert that all the RangeSets in 'seq' form a partition of the

1165

'total' RangeSet (ie, they are nonintersecting and their union

1166

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1167

Doug Zongker