Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

17

from collections import deque, OrderedDict

18

from hashlib import sha1

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import array

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

20

import common

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

21

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

22

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

23

import itertools

24

import multiprocessing

25

import os

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

27

import subprocess

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

29

import time

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

30

import tempfile

31

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

32

from rangelib import RangeSet

33

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

34

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

35

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

36

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

37

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

38

def compute_patch(src, tgt, imgdiff=False):

39

srcfd, srcfile = tempfile.mkstemp(prefix="src-")

40

tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")

41

patchfd, patchfile = tempfile.mkstemp(prefix="patch-")

os.close(patchfd)

try:

with os.fdopen(srcfd, "wb") as f_src:

for p in src:

f_src.write(p)

with os.fdopen(tgtfd, "wb") as f_tgt:

for p in tgt:

f_tgt.write(p)

try:

os.unlink(patchfile)

except OSError:

pass

if imgdiff:

p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],

58

stdout=open("/dev/null", "a"),

59

stderr=subprocess.STDOUT)

60

else:

61

p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])

62

63

if p:

64

raise ValueError("diff failed: " + str(p))

65

66

with open(patchfile, "rb") as f:

return f.read()

finally:

try:

os.unlink(srcfile)

os.unlink(tgtfile)

os.unlink(patchfile)

except OSError:

pass

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

76

77

class Image(object):

78

def ReadRangeSet(self, ranges):

79

raise NotImplementedError

80

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

81

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

82

raise NotImplementedError

83

84

85

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

86

"""A zero-length image."""

87

blocksize = 4096

88

care_map = RangeSet()

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

89

clobbered_blocks = RangeSet()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

90

extended = RangeSet()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

91

total_blocks = 0

92

file_map = {}

93

def ReadRangeSet(self, ranges):

94

return ()

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

95

def TotalSha1(self, include_clobbered_blocks=False):

96

# EmptyImage always carries empty clobbered_blocks, so

97

# include_clobbered_blocks can be ignored.

98

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

99

return sha1().hexdigest()

100

101

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

102

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

103

"""An image wrapped around a single string of data."""

104

105

def __init__(self, data, trim=False, pad=False):

106

self.data = data

107

self.blocksize = 4096

108

109

assert not (trim and pad)

110

111

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

112

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

113

if partial > 0:

114

if trim:

115

self.data = self.data[:-partial]

116

elif pad:

117

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

118

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

119

else:

120

raise ValueError(("data for DataImage must be multiple of %d bytes "

121

"unless trim or pad is specified") %

122

(self.blocksize,))

123

124

assert len(self.data) % self.blocksize == 0

125

126

self.total_blocks = len(self.data) / self.blocksize

127

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

128

# When the last block is padded, we always write the whole block even for

129

# incremental OTAs. Because otherwise the last block may get skipped if

130

# unchanged for an incremental, but would fail the post-install

131

# verification if it has non-zero contents in the padding bytes.

132

# Bug: 23828506

133

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

134

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

135

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

136

clobbered_blocks = []

137

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

138

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

143

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

144

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

145

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

146

if d == reference:

147

zero_blocks.append(i)

148

zero_blocks.append(i+1)

149

else:

150

nonzero_blocks.append(i)

151

nonzero_blocks.append(i+1)

152

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

153

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

154

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

155

self.file_map = dict()

156

if zero_blocks:

157

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

158

if nonzero_blocks:

159

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

160

if clobbered_blocks:

161

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

162

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

163

def ReadRangeSet(self, ranges):

164

return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]

165

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

166

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

167

if not include_clobbered_blocks:

168

ranges = self.care_map.subtract(self.clobbered_blocks)

169

return sha1(self.ReadRangeSet(ranges)).hexdigest()

170

else:

171

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

172

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

173

174

class Transfer(object):

175

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):

176

self.tgt_name = tgt_name

177

self.src_name = src_name

178

self.tgt_ranges = tgt_ranges

179

self.src_ranges = src_ranges

180

self.style = style

181

self.intact = (getattr(tgt_ranges, "monotonic", False) and

182

getattr(src_ranges, "monotonic", False))

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

183

184

# We use OrderedDict rather than dict so that the output is repeatable;

185

# otherwise it would depend on the hash values of the Transfer objects.

186

self.goes_before = OrderedDict()

187

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

188

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

189

self.stash_before = []

190

self.use_stash = []

191

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

195

def NetStashChange(self):

196

return (sum(sr.size() for (_, sr) in self.stash_before) -

197

sum(sr.size() for (_, sr) in self.use_stash))

198

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

199

def ConvertToNew(self):

200

assert self.style != "new"

201

self.use_stash = []

202

self.style = "new"

203

self.src_ranges = RangeSet()

204

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

205

def __str__(self):

206

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

207

" to " + str(self.tgt_ranges) + ">")

208

209

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

210

@functools.total_ordering

211

class HeapItem(object):

212

def __init__(self, item):

213

self.item = item

214

# Negate the score since python's heap is a min-heap and we want

215

# the maximum score.

216

self.score = -item.score

def clear(self):

self.item = None

def __bool__(self):

return self.item is None

221

def __eq__(self, other):

222

return self.score == other.score

223

def __le__(self, other):

224

return self.score <= other.score

225

226

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

227

# BlockImageDiff works on two image objects. An image object is

228

# anything that provides the following attributes:

229

#

230

# blocksize: the size in bytes of a block, currently must be 4096.

231

#

232

# total_blocks: the total size of the partition/image, in blocks.

233

#

234

# care_map: a RangeSet containing which blocks (in the range [0,

235

# total_blocks) we actually care about; i.e. which blocks contain

236

# data.

237

#

238

# file_map: a dict that partitions the blocks contained in care_map

239

# into smaller domains that are useful for doing diffs on.

240

# (Typically a domain is a file, and the key in file_map is the

241

# pathname.)

242

#

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

243

# clobbered_blocks: a RangeSet containing which blocks contain data

244

# but may be altered by the FS. They need to be excluded when

245

# verifying the partition integrity.

246

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

247

# ReadRangeSet(): a function that takes a RangeSet and returns the

248

# data contained in the image blocks of that RangeSet. The data

249

# is returned as a list or tuple of strings; concatenating the

250

# elements together should produce the requested data.

251

# Implementations are free to break up the data into list/tuple

252

# elements in any way that is convenient.

253

#

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

254

# TotalSha1(): a function that returns (as a hex string) the SHA-1

255

# hash of all the data in the image (ie, all the blocks in the

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

256

# care_map minus clobbered_blocks, or including the clobbered

257

# blocks if include_clobbered_blocks is True).

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

258

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

259

# When creating a BlockImageDiff, the src image may be None, in which

260

# case the list of transfers produced will never read from the

261

# original image.

262

263

class BlockImageDiff(object):

Tao Bao

eba409c

2015-10-21 13:30:43 -0700

[diff] [blame]

264

def __init__(self, tgt, src=None, threads=None, version=4):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

265

if threads is None:

266

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

267

if threads == 0:

268

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

269

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

270

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

271

self.transfers = []

272

self.src_basenames = {}

273

self.src_numpatterns = {}

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

274

self._max_stashed_size = 0

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

275

self.touched_src_ranges = RangeSet()

276

self.touched_src_sha1 = None

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

277

Tao Bao

eba409c

2015-10-21 13:30:43 -0700

[diff] [blame]

278

assert version in (1, 2, 3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

286

assert tgt.blocksize == 4096

287

assert src.blocksize == 4096

288

289

# The range sets in each filemap should comprise a partition of

290

# the care map.

291

self.AssertPartition(src.care_map, src.file_map.values())

292

self.AssertPartition(tgt.care_map, tgt.file_map.values())

293

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

294

@property

295

def max_stashed_size(self):

296

return self._max_stashed_size

297

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

298

def Compute(self, prefix):

299

# When looking for a source file to use as the diff input for a

300

# target file, we try:

301

# 1) an exact path match if available, otherwise

302

# 2) a exact basename match if available, otherwise

303

# 3) a basename match after all runs of digits are replaced by

304

# "#" if available, otherwise

305

# 4) we have no source for this target.

306

self.AbbreviateSourceNames()

307

self.FindTransfers()

308

309

# Find the ordering dependencies among transfers (this is O(n^2)

310

# in the number of transfers).

311

self.GenerateDigraph()

312

# Find a sequence of transfers that satisfies as many ordering

313

# dependencies as possible (heuristically).

314

self.FindVertexSequence()

315

# Fix up the ordering dependencies that the sequence didn't

316

# satisfy.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

317

if self.version == 1:

318

self.RemoveBackwardEdges()

319

else:

320

self.ReverseBackwardEdges()

321

self.ImproveVertexSequence()

322

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

323

# Ensure the runtime stash size is under the limit.

324

if self.version >= 2 and common.OPTIONS.cache_size is not None:

325

self.ReviseStashSize()

326

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

327

# Double-check our work.

328

self.AssertSequenceGood()

329

330

self.ComputePatches(prefix)

331

self.WriteTransfers(prefix)

332

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

333

def HashBlocks(self, source, ranges): # pylint: disable=no-self-use

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

334

data = source.ReadRangeSet(ranges)

ctx = sha1()

for p in data:

ctx.update(p)

return ctx.hexdigest()

341

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

342

def WriteTransfers(self, prefix):

343

out = []

344

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

345

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

346

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

347

stashes = {}

348

stashed_blocks = 0

349

max_stashed_blocks = 0

free_stash_ids = []

next_stash_id = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

354

for xf in self.transfers:

355

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

356

if self.version < 2:

357

assert not xf.stash_before

358

assert not xf.use_stash

359

360

for s, sr in xf.stash_before:

361

assert s not in stashes

362

if free_stash_ids:

363

sid = heapq.heappop(free_stash_ids)

else:

sid = next_stash_id

next_stash_id += 1

stashes[s] = sid

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

368

if self.version == 2:

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

369

stashed_blocks += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

370

out.append("stash %d %s\n" % (sid, sr.to_string_raw()))

371

else:

372

sh = self.HashBlocks(self.src, sr)

if sh in stashes:

stashes[sh] += 1

else:

stashes[sh] = 1

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

377

stashed_blocks += sr.size()

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

378

self.touched_src_ranges = self.touched_src_ranges.union(sr)

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

379

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

380

381

if stashed_blocks > max_stashed_blocks:

382

max_stashed_blocks = stashed_blocks

383

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

384

free_string = []

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

385

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

386

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

387

if self.version == 1:

Tao Bao

4fcb77e

2015-10-21 13:36:01 -0700

[diff] [blame]

388

src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

389

elif self.version >= 2:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

390

391

# <# blocks> <src ranges>

392

# OR

393

# <# blocks> <src ranges> <src locs> <stash refs...>

394

# OR

395

# <# blocks> - <stash refs...>

396

397

size = xf.src_ranges.size()

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

398

src_str = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

399

400

unstashed_src_ranges = xf.src_ranges

401

mapped_stashes = []

402

for s, sr in xf.use_stash:

403

sid = stashes.pop(s)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

404

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

405

sh = self.HashBlocks(self.src, sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

406

sr = xf.src_ranges.map_within(sr)

407

mapped_stashes.append(sr)

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

408

if self.version == 2:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

409

src_str.append("%d:%s" % (sid, sr.to_string_raw()))

Tao Bao

bb625d2

2015-08-13 14:44:15 -0700

[diff] [blame]

410

# A stash will be used only once. We need to free the stash

411

# immediately after the use, instead of waiting for the automatic

412

# clean-up at the end. Because otherwise it may take up extra space

413

# and lead to OTA failures.

414

# Bug: 23119955

415

free_string.append("free %d\n" % (sid,))

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

416

free_size += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

417

else:

418

assert sh in stashes

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

419

src_str.append("%s:%s" % (sh, sr.to_string_raw()))

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

420

stashes[sh] -= 1

421

if stashes[sh] == 0:

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

422

free_size += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

423

free_string.append("free %s\n" % (sh))

424

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

425

heapq.heappush(free_stash_ids, sid)

426

427

if unstashed_src_ranges:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

428

src_str.insert(1, unstashed_src_ranges.to_string_raw())

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

429

if xf.use_stash:

430

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

431

src_str.insert(2, mapped_unstashed.to_string_raw())

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

432

mapped_stashes.append(mapped_unstashed)

433

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

434

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

435

src_str.insert(1, "-")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

436

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

437

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

438

src_str = " ".join(src_str)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

439

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

440

# all versions:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

# zero <rangeset>

# new <rangeset>

# erase <rangeset>

#

# version 1:

# bsdiff patchstart patchlen <src rangeset> <tgt rangeset>

447

# imgdiff patchstart patchlen <src rangeset> <tgt rangeset>

448

# move <src rangeset> <tgt rangeset>

449

#

450

# version 2:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

451

# bsdiff patchstart patchlen <tgt rangeset> <src_str>

452

# imgdiff patchstart patchlen <tgt rangeset> <src_str>

453

# move <tgt rangeset> <src_str>

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

454

#

455

# version 3:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

456

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

457

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

458

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

459

460

tgt_size = xf.tgt_ranges.size()

461

462

if xf.style == "new":

463

assert xf.tgt_ranges

464

out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))

465

total += tgt_size

466

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

467

assert xf.tgt_ranges

468

assert xf.src_ranges.size() == tgt_size

469

if xf.src_ranges != xf.tgt_ranges:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

470

if self.version == 1:

471

out.append("%s %s %s\n" % (

472

xf.style,

473

xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))

474

elif self.version == 2:

475

out.append("%s %s %s\n" % (

476

xf.style,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

477

xf.tgt_ranges.to_string_raw(), src_str))

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

478

elif self.version >= 3:

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

479

# take into account automatic stashing of overlapping blocks

480

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

481

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

482

if temp_stash_usage > max_stashed_blocks:

483

max_stashed_blocks = temp_stash_usage

484

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

485

self.touched_src_ranges = self.touched_src_ranges.union(

486

xf.src_ranges)

487

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

488

out.append("%s %s %s %s\n" % (

489

xf.style,

490

self.HashBlocks(self.tgt, xf.tgt_ranges),

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

491

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

492

total += tgt_size

493

elif xf.style in ("bsdiff", "imgdiff"):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

494

assert xf.tgt_ranges

495

assert xf.src_ranges

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

496

if self.version == 1:

497

out.append("%s %d %d %s %s\n" % (

498

xf.style, xf.patch_start, xf.patch_len,

499

xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))

500

elif self.version == 2:

501

out.append("%s %d %d %s %s\n" % (

502

xf.style, xf.patch_start, xf.patch_len,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

503

xf.tgt_ranges.to_string_raw(), src_str))

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

504

elif self.version >= 3:

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

505

# take into account automatic stashing of overlapping blocks

506

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

507

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

508

if temp_stash_usage > max_stashed_blocks:

509

max_stashed_blocks = temp_stash_usage

510

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

511

self.touched_src_ranges = self.touched_src_ranges.union(

512

xf.src_ranges)

513

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

514

out.append("%s %d %d %s %s %s %s\n" % (

515

xf.style,

516

xf.patch_start, xf.patch_len,

517

self.HashBlocks(self.src, xf.src_ranges),

518

self.HashBlocks(self.tgt, xf.tgt_ranges),

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

519

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

520

total += tgt_size

521

elif xf.style == "zero":

522

assert xf.tgt_ranges

523

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

524

if to_zero:

525

out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))

526

total += to_zero.size()

527

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

528

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

529

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

530

if free_string:

531

out.append("".join(free_string))

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

532

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

533

Tao Bao

575d68a

2015-08-07 19:49:45 -0700

[diff] [blame]

534

if self.version >= 2 and common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

535

# Sanity check: abort if we're going to need more stash space than

536

# the allowed size (cache_size * threshold). There are two purposes

537

# of having a threshold here. a) Part of the cache may have been

538

# occupied by some recovery logs. b) It will buy us some time to deal

539

# with the oversize issue.

540

cache_size = common.OPTIONS.cache_size

541

stash_threshold = common.OPTIONS.stash_threshold

542

max_allowed = cache_size * stash_threshold

543

assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \

544

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

545

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

546

self.tgt.blocksize, max_allowed, cache_size,

547

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

548

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

549

if self.version >= 3:

550

self.touched_src_sha1 = self.HashBlocks(

551

self.src, self.touched_src_ranges)

552

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

553

# Zero out extended blocks as a workaround for bug 20881595.

554

if self.tgt.extended:

555

out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

556

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

557

558

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

559

# data in the new image; b) will not be touched by dm-verity. Out of those

560

# blocks, we erase the ones that won't be used in this update at the

561

# beginning of an update. The rest would be erased at the end. This is to

562

# work around the eMMC issue observed on some devices, which may otherwise

563

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

564

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

565

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

566

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

567

568

erase_first = new_dontcare.subtract(self.touched_src_ranges)

569

if erase_first:

570

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

571

572

erase_last = new_dontcare.subtract(erase_first)

573

if erase_last:

574

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

575

576

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

577

out.insert(1, "%d\n" % (total,))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

578

if self.version >= 2:

579

# version 2 only: after the total block count, we give the number

580

# of stash slots needed, and the maximum size needed (in blocks)

581

out.insert(2, str(next_stash_id) + "\n")

582

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

583

584

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

588

if self.version >= 2:

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

589

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

Tao Bao

575d68a

2015-08-07 19:49:45 -0700

[diff] [blame]

590

OPTIONS = common.OPTIONS

591

if OPTIONS.cache_size is not None:

592

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

593

print("max stashed blocks: %d (%d bytes), "

594

"limit: %d bytes (%.2f%%)\n" % (

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

595

max_stashed_blocks, self._max_stashed_size, max_allowed,

596

self._max_stashed_size * 100.0 / max_allowed))

Tao Bao

575d68a

2015-08-07 19:49:45 -0700

[diff] [blame]

597

else:

598

print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

599

max_stashed_blocks, self._max_stashed_size))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

600

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

601

def ReviseStashSize(self):

602

print("Revising stash size...")

603

stashes = {}

604

605

# Create the map between a stash and its def/use points. For example, for a

606

# given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).

607

for xf in self.transfers:

608

# Command xf defines (stores) all the stashes in stash_before.

609

for idx, sr in xf.stash_before:

610

stashes[idx] = (sr, xf)

611

612

# Record all the stashes command xf uses.

613

for idx, _ in xf.use_stash:

614

stashes[idx] += (xf,)

615

616

# Compute the maximum blocks available for stash based on /cache size and

617

# the threshold.

618

cache_size = common.OPTIONS.cache_size

619

stash_threshold = common.OPTIONS.stash_threshold

620

max_allowed = cache_size * stash_threshold / self.tgt.blocksize

621

622

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

623

new_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

624

625

# Now go through all the commands. Compute the required stash size on the

626

# fly. If a command requires excess stash than available, it deletes the

627

# stash by replacing the command that uses the stash with a "new" command

628

# instead.

629

for xf in self.transfers:

630

replaced_cmds = []

631

632

# xf.stash_before generates explicit stash commands.

633

for idx, sr in xf.stash_before:

634

if stashed_blocks + sr.size() > max_allowed:

635

# We cannot stash this one for a later command. Find out the command

636

# that will use this stash and replace the command with "new".

637

use_cmd = stashes[idx][2]

638

replaced_cmds.append(use_cmd)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

639

print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

640

else:

641

stashed_blocks += sr.size()

642

643

# xf.use_stash generates free commands.

644

for _, sr in xf.use_stash:

645

stashed_blocks -= sr.size()

646

647

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

648

# ComputePatches(), they both have the style of "diff".

649

if xf.style == "diff" and self.version >= 3:

650

assert xf.tgt_ranges and xf.src_ranges

651

if xf.src_ranges.overlaps(xf.tgt_ranges):

652

if stashed_blocks + xf.src_ranges.size() > max_allowed:

653

replaced_cmds.append(xf)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

654

print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

655

656

# Replace the commands in replaced_cmds with "new"s.

657

for cmd in replaced_cmds:

658

# It no longer uses any commands in "use_stash". Remove the def points

659

# for all those stashes.

660

for idx, sr in cmd.use_stash:

661

def_cmd = stashes[idx][1]

662

assert (idx, sr) in def_cmd.stash_before

663

def_cmd.stash_before.remove((idx, sr))

664

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

665

# Add up blocks that violates space limit and print total number to

666

# screen later.

667

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

668

cmd.ConvertToNew()

669

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

670

num_of_bytes = new_blocks * self.tgt.blocksize

671

print(" Total %d blocks (%d bytes) are packed as new blocks due to "

672

"insufficient cache size." % (new_blocks, num_of_bytes))

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

673

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

674

def ComputePatches(self, prefix):

675

print("Reticulating splines...")

676

diff_q = []

677

patch_num = 0

678

with open(prefix + ".new.dat", "wb") as new_f:

679

for xf in self.transfers:

680

if xf.style == "zero":

681

pass

682

elif xf.style == "new":

683

for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):

684

new_f.write(piece)

685

elif xf.style == "diff":

686

src = self.src.ReadRangeSet(xf.src_ranges)

687

tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)

688

689

# We can't compare src and tgt directly because they may have

690

# the same content but be broken up into blocks differently, eg:

691

#

692

# ["he", "llo"] vs ["h", "ello"]

693

#

694

# We want those to compare equal, ideally without having to

695

# actually concatenate the strings (these may be tens of

# megabytes).

src_sha1 = sha1()

for p in src:

src_sha1.update(p)

tgt_sha1 = sha1()

tgt_size = 0

for p in tgt:

tgt_sha1.update(p)

tgt_size += len(p)

if src_sha1.digest() == tgt_sha1.digest():

708

# These are identical; we don't need to generate a patch,

709

# just issue copy commands on the device.

710

xf.style = "move"

711

else:

712

# For files in zip format (eg, APKs, JARs, etc.) we would

713

# like to use imgdiff -z if possible (because it usually

714

# produces significantly smaller patches than bsdiff).

715

# This is permissible if:

716

#

717

# - the source and target files are monotonic (ie, the

718

# data is stored with blocks in increasing order), and

719

# - we haven't removed any blocks from the source set.

720

#

721

# If these conditions are satisfied then appending all the

722

# blocks in the set together in order will produce a valid

723

# zip file (plus possibly extra zeros in the last block),

724

# which is what imgdiff needs to operate. (imgdiff is

725

# fine with extra zeros at the end of the file.)

726

imgdiff = (xf.intact and

727

xf.tgt_name.split(".")[-1].lower()

728

in ("apk", "jar", "zip"))

729

xf.style = "imgdiff" if imgdiff else "bsdiff"

730

diff_q.append((tgt_size, src, tgt, xf, patch_num))

patch_num += 1

else:

assert False, "unknown style " + xf.style

if diff_q:

if self.threads > 1:

print("Computing patches (using %d threads)..." % (self.threads,))

739

else:

740

print("Computing patches...")

741

diff_q.sort()

742

743

patches = [None] * patch_num

744

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

745

# TODO: Rewrite with multiprocessing.ThreadPool?

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

746

lock = threading.Lock()

747

def diff_worker():

748

while True:

749

with lock:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

750

if not diff_q:

751

return

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

752

tgt_size, src, tgt, xf, patchnum = diff_q.pop()

753

patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))

754

size = len(patch)

755

with lock:

756

patches[patchnum] = (patch, xf)

757

print("%10d %10d (%6.2f%%) %7s %s" % (

758

size, tgt_size, size * 100.0 / tgt_size, xf.style,

759

xf.tgt_name if xf.tgt_name == xf.src_name else (

760

xf.tgt_name + " (from " + xf.src_name + ")")))

761

762

threads = [threading.Thread(target=diff_worker)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

763

for _ in range(self.threads)]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for th in threads:

th.start()

while threads:

threads.pop().join()

else:

patches = []

p = 0

with open(prefix + ".patch.dat", "wb") as patch_f:

773

for patch, xf in patches:

774

xf.patch_start = p

775

xf.patch_len = len(patch)

patch_f.write(patch)

p += len(patch)

def AssertSequenceGood(self):

780

# Simulate the sequences of transfers we will output, and check that:

781

# - we never read a block after writing it, and

782

# - we write every block we care about exactly once.

783

784

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

785

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

786

787

# Imagine processing the transfers in order.

788

for xf in self.transfers:

789

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

790

791

x = xf.src_ranges

792

if self.version >= 2:

793

for _, sr in xf.use_stash:

794

x = x.subtract(sr)

795

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

796

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

797

# Source image could be larger. Don't check the blocks that are in the

798

# source image only. Since they are not in 'touched', and won't ever

799

# be touched.

800

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

801

assert touched[i] == 0

802

803

# Check that the output blocks for this transfer haven't yet

804

# been touched, and touch all the blocks written by this

805

# transfer.

806

for s, e in xf.tgt_ranges:

807

for i in range(s, e):

808

assert touched[i] == 0

809

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

810

811

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

812

for s, e in self.tgt.care_map:

813

for i in range(s, e):

814

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

815

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

816

def ImproveVertexSequence(self):

817

print("Improving vertex order...")

818

819

# At this point our digraph is acyclic; we reversed any edges that

820

# were backwards in the heuristically-generated sequence. The

821

# previously-generated order is still acceptable, but we hope to

822

# find a better order that needs less memory for stashed data.

823

# Now we do a topological sort to generate a new vertex order,

824

# using a greedy algorithm to choose which vertex goes next

825

# whenever we have a choice.

826

827

# Make a copy of the edge set; this copy will get destroyed by the

828

# algorithm.

829

for xf in self.transfers:

830

xf.incoming = xf.goes_after.copy()

831

xf.outgoing = xf.goes_before.copy()

832

833

L = [] # the new vertex order

834

835

# S is the set of sources in the remaining graph; we always choose

836

# the one that leaves the least amount of stashed data after it's

837

# executed.

838

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

844

L.append(xf)

845

for u in xf.outgoing:

846

del u.incoming[xf]

847

if not u.incoming:

848

heapq.heappush(S, (u.NetStashChange(), u.order, u))

849

850

# if this fails then our graph had a cycle.

851

assert len(L) == len(self.transfers)

852

853

self.transfers = L

854

for i, xf in enumerate(L):

855

xf.order = i

856

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

857

def RemoveBackwardEdges(self):

858

print("Removing backward edges...")

in_order = 0

out_of_order = 0

lost_source = 0

for xf in self.transfers:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

864

lost = 0

865

size = xf.src_ranges.size()

866

for u in xf.goes_before:

867

# xf should go before u

868

if xf.order < u.order:

869

# it does, hurray!

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

870

in_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

871

else:

872

# it doesn't, boo. trim the blocks that u writes from xf's

873

# source, so that xf can go after u.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

874

out_of_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

875

assert xf.src_ranges.overlaps(u.tgt_ranges)

876

xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)

877

xf.intact = False

878

879

if xf.style == "diff" and not xf.src_ranges:

880

# nothing left to diff from; treat as new data

881

xf.style = "new"

882

883

lost = size - xf.src_ranges.size()

884

lost_source += lost

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

885

886

print((" %d/%d dependencies (%.2f%%) were violated; "

887

"%d source blocks removed.") %

888

(out_of_order, in_order + out_of_order,

889

(out_of_order * 100.0 / (in_order + out_of_order))

890

if (in_order + out_of_order) else 0.0,

891

lost_source))

892

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

893

def ReverseBackwardEdges(self):

894

print("Reversing backward edges...")

in_order = 0

out_of_order = 0

stashes = 0

stash_size = 0

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

901

for u in xf.goes_before.copy():

902

# xf should go before u

903

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

908

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

913

assert overlap

914

915

u.stash_before.append((stashes, overlap))

916

xf.use_stash.append((stashes, overlap))

917

stashes += 1

918

stash_size += overlap.size()

919

920

# reverse the edge direction; now xf must go after u

921

del xf.goes_before[u]

922

del u.goes_after[xf]

923

xf.goes_after[u] = None # value doesn't matter

924

u.goes_before[xf] = None

925

926

print((" %d/%d dependencies (%.2f%%) were violated; "

927

"%d source blocks stashed.") %

928

(out_of_order, in_order + out_of_order,

929

(out_of_order * 100.0 / (in_order + out_of_order))

930

if (in_order + out_of_order) else 0.0,

931

stash_size))

932

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

933

def FindVertexSequence(self):

934

print("Finding vertex sequence...")

935

936

# This is based on "A Fast & Effective Heuristic for the Feedback

937

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

938

# it as starting with the digraph G and moving all the vertices to

939

# be on a horizontal line in some order, trying to minimize the

940

# number of edges that end up pointing to the left. Left-pointing

941

# edges will get removed to turn the digraph into a DAG. In this

942

# case each edge has a weight which is the number of source blocks

943

# we'll lose if that edge is removed; we try to minimize the total

944

# weight rather than just the number of edges.

945

946

# Make a copy of the edge set; this copy will get destroyed by the

947

# algorithm.

948

for xf in self.transfers:

949

xf.incoming = xf.goes_after.copy()

950

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

951

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

952

953

# We use an OrderedDict instead of just a set so that the output

954

# is repeatable; otherwise it would depend on the hash values of

955

# the transfer objects.

956

G = OrderedDict()

957

for xf in self.transfers:

958

G[xf] = None

959

s1 = deque() # the left side of the sequence, built from left to right

960

s2 = deque() # the right side of the sequence, built from right to left

961

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

962

heap = []

963

for xf in self.transfers:

964

xf.heap_item = HeapItem(xf)

965

heap.append(xf.heap_item)

966

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

967

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

968

sinks = set(u for u in G if not u.outgoing)

969

sources = set(u for u in G if not u.incoming)

970

971

def adjust_score(iu, delta):

972

iu.score += delta

973

iu.heap_item.clear()

974

iu.heap_item = HeapItem(iu)

975

heapq.heappush(heap, iu.heap_item)

976

977

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

978

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

979

while sinks:

980

new_sinks = set()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

981

for u in sinks:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

982

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

983

s2.appendleft(u)

984

del G[u]

985

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

986

adjust_score(iu, -iu.outgoing.pop(u))

987

if not iu.outgoing: new_sinks.add(iu)

988

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

989

990

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

991

while sources:

992

new_sources = set()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

993

for u in sources:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

994

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

995

s1.append(u)

996

del G[u]

997

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

998

adjust_score(iu, +iu.incoming.pop(u))

999

if not iu.incoming: new_sources.add(iu)

1000

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1001

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1002

if not G: break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1003

1004

# Find the "best" vertex to put next. "Best" is the one that

1005

# maximizes the net difference in source blocks saved we get by

1006

# pretending it's a source rather than a sink.

1007

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1008

while True:

1009

u = heapq.heappop(heap)

1010

if u and u.item in G:

1011

u = u.item

1012

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1013

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1014

s1.append(u)

1015

del G[u]

1016

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1017

adjust_score(iu, +iu.incoming.pop(u))

1018

if not iu.incoming: sources.add(iu)

1019

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1020

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1021

adjust_score(iu, -iu.outgoing.pop(u))

1022

if not iu.outgoing: sinks.add(iu)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1023

1024

# Now record the sequence in the 'order' field of each transfer,

1025

# and by rearranging self.transfers to be in the chosen sequence.

1026

1027

new_transfers = []

1028

for x in itertools.chain(s1, s2):

1029

x.order = len(new_transfers)

1030

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1035

1036

def GenerateDigraph(self):

1037

print("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1038

1039

# Each item of source_ranges will be:

1040

# - None, if that block is not used as a source,

1041

# - a transfer, if one transfer uses it as a source, or

1042

# - a set of transfers.

1043

source_ranges = []

1044

for b in self.transfers:

1045

for s, e in b.src_ranges:

1046

if e > len(source_ranges):

1047

source_ranges.extend([None] * (e-len(source_ranges)))

1048

for i in range(s, e):

1049

if source_ranges[i] is None:

1050

source_ranges[i] = b

1051

else:

1052

if not isinstance(source_ranges[i], set):

1053

source_ranges[i] = set([source_ranges[i]])

1054

source_ranges[i].add(b)

1055

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1056

for a in self.transfers:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1057

intersections = set()

1058

for s, e in a.tgt_ranges:

1059

for i in range(s, e):

1060

if i >= len(source_ranges): break

1061

b = source_ranges[i]

1062

if b is not None:

1063

if isinstance(b, set):

1064

intersections.update(b)

else:

intersections.add(b)

for b in intersections:

1069

if a is b: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1070

1071

# If the blocks written by A are read by B, then B needs to go before A.

1072

i = a.tgt_ranges.intersect(b.src_ranges)

1073

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1074

if b.src_name == "__ZERO":

1075

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1080

b.goes_before[a] = size

1081

a.goes_after[b] = size

1082

1083

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1084

"""Parse the file_map to generate all the transfers."""

1085

1086

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1087

split=False):

1088

"""Wrapper function for adding a Transfer().

1089

1090

For BBOTA v3, we need to stash source blocks for resumable feature.

1091

However, with the growth of file size and the shrink of the cache

1092

partition source blocks are too large to be stashed. If a file occupies

1093

too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it

1094

into smaller pieces by getting multiple Transfer()s.

1095

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1096

The downside is that after splitting, we may increase the package size

1097

since the split pieces don't align well. According to our experiments,

1098

1/8 of the cache size as the per-piece limit appears to be optimal.

1099

Compared to the fixed 1024-block limit, it reduces the overall package

1100

size by 30% volantis, and 20% for angler and bullhead."""

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1101

1102

# We care about diff transfers only.

1103

if style != "diff" or not split:

1104

Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1105

return

1106

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1107

pieces = 0

1108

cache_size = common.OPTIONS.cache_size

1109

split_threshold = 0.125

1110

max_blocks_per_transfer = int(cache_size * split_threshold /

1111

self.tgt.blocksize)

1112

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1113

# Change nothing for small files.

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1114

if (tgt_ranges.size() <= max_blocks_per_transfer and

1115

src_ranges.size() <= max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1116

Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1117

return

1118

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1119

while (tgt_ranges.size() > max_blocks_per_transfer and

1120

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1121

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1122

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1123

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1124

src_first = src_ranges.first(max_blocks_per_transfer)

1125

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1126

Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,

1127

by_id)

1128

1129

tgt_ranges = tgt_ranges.subtract(tgt_first)

1130

src_ranges = src_ranges.subtract(src_first)

1131

pieces += 1

1132

1133

# Handle remaining blocks.

1134

if tgt_ranges.size() or src_ranges.size():

1135

# Must be both non-empty.

1136

assert tgt_ranges.size() and src_ranges.size()

1137

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1138

src_split_name = "%s-%d" % (src_name, pieces)

1139

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,

1140

by_id)

1141

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1142

empty = RangeSet()

1143

for tgt_fn, tgt_ranges in self.tgt.file_map.items():

1144

if tgt_fn == "__ZERO":

1145

# the special "__ZERO" domain is all the blocks not contained

1146

# in any file and that are filled with zeros. We have a

1147

# special transfer style for zero blocks.

1148

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1149

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1150

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1151

continue

1152

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

1153

elif tgt_fn == "__COPY":

1154

# "__COPY" domain includes all the blocks not contained in any

1155

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1156

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

1157

continue

1158

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1159

elif tgt_fn in self.src.file_map:

1160

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1161

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

1162

"diff", self.transfers, self.version >= 3)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1163

continue

1164

1165

b = os.path.basename(tgt_fn)

1166

if b in self.src_basenames:

1167

# Look for an exact basename match in the source.

1168

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1169

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

1170

"diff", self.transfers, self.version >= 3)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1171

continue

1172

1173

b = re.sub("[0-9]+", "#", b)

1174

if b in self.src_numpatterns:

1175

# Look for a 'number pattern' match (a basename match after

1176

# all runs of digits are replaced by "#"). (This is useful

1177

# for .so files that contain version numbers in the filename

1178

# that get bumped.)

1179

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1180

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

1181

"diff", self.transfers, self.version >= 3)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1182

continue

1183

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1184

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1185

1186

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1187

for k in self.src.file_map.keys():

1188

b = os.path.basename(k)

1189

self.src_basenames[b] = k

1190

b = re.sub("[0-9]+", "#", b)

1191

self.src_numpatterns[b] = k

1192

1193

@staticmethod

1194

def AssertPartition(total, seq):

1195

"""Assert that all the RangeSets in 'seq' form a partition of the

1196

'total' RangeSet (ie, they are nonintersecting and their union

1197

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1198

Doug Zongker