Blame - tools/releasetools/blockimgdiff.py - android_build

2014-08-26 13:10:25 -0700

[diff] [blame]

15

from __future__ import print_function

16

17

from collections import deque, OrderedDict

18

from hashlib import sha1

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

19

import array

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

20

import common

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

21

import functools

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

22

import heapq

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

23

import itertools

24

import multiprocessing

25

import os

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

26

import re

27

import subprocess

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

28

import threading

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

29

import time

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

30

import tempfile

31

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

32

from rangelib import RangeSet

33

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

34

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

35

__all__ = ["EmptyImage", "DataImage", "BlockImageDiff"]

36

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

37

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

38

def compute_patch(src, tgt, imgdiff=False):

39

srcfd, srcfile = tempfile.mkstemp(prefix="src-")

40

tgtfd, tgtfile = tempfile.mkstemp(prefix="tgt-")

41

patchfd, patchfile = tempfile.mkstemp(prefix="patch-")

os.close(patchfd)

try:

with os.fdopen(srcfd, "wb") as f_src:

for p in src:

f_src.write(p)

with os.fdopen(tgtfd, "wb") as f_tgt:

for p in tgt:

f_tgt.write(p)

try:

os.unlink(patchfile)

except OSError:

pass

if imgdiff:

p = subprocess.call(["imgdiff", "-z", srcfile, tgtfile, patchfile],

58

stdout=open("/dev/null", "a"),

59

stderr=subprocess.STDOUT)

60

else:

61

p = subprocess.call(["bsdiff", srcfile, tgtfile, patchfile])

62

63

if p:

64

raise ValueError("diff failed: " + str(p))

65

66

with open(patchfile, "rb") as f:

return f.read()

finally:

try:

os.unlink(srcfile)

os.unlink(tgtfile)

os.unlink(patchfile)

except OSError:

pass

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

76

77

class Image(object):

78

def ReadRangeSet(self, ranges):

79

raise NotImplementedError

80

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

81

def TotalSha1(self, include_clobbered_blocks=False):

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

82

raise NotImplementedError

83

84

85

class EmptyImage(Image):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

86

"""A zero-length image."""

87

blocksize = 4096

88

care_map = RangeSet()

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

89

clobbered_blocks = RangeSet()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

90

extended = RangeSet()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

91

total_blocks = 0

92

file_map = {}

93

def ReadRangeSet(self, ranges):

94

return ()

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

95

def TotalSha1(self, include_clobbered_blocks=False):

96

# EmptyImage always carries empty clobbered_blocks, so

97

# include_clobbered_blocks can be ignored.

98

assert self.clobbered_blocks.size() == 0

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

99

return sha1().hexdigest()

100

101

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

102

class DataImage(Image):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

103

"""An image wrapped around a single string of data."""

104

105

def __init__(self, data, trim=False, pad=False):

106

self.data = data

107

self.blocksize = 4096

108

109

assert not (trim and pad)

110

111

partial = len(self.data) % self.blocksize

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

112

padded = False

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

113

if partial > 0:

114

if trim:

115

self.data = self.data[:-partial]

116

elif pad:

117

self.data += '\0' * (self.blocksize - partial)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

118

padded = True

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

119

else:

120

raise ValueError(("data for DataImage must be multiple of %d bytes "

121

"unless trim or pad is specified") %

122

(self.blocksize,))

123

124

assert len(self.data) % self.blocksize == 0

125

126

self.total_blocks = len(self.data) / self.blocksize

127

self.care_map = RangeSet(data=(0, self.total_blocks))

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

128

# When the last block is padded, we always write the whole block even for

129

# incremental OTAs. Because otherwise the last block may get skipped if

130

# unchanged for an incremental, but would fail the post-install

131

# verification if it has non-zero contents in the padding bytes.

132

# Bug: 23828506

133

if padded:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

134

clobbered_blocks = [self.total_blocks-1, self.total_blocks]

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

135

else:

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

136

clobbered_blocks = []

137

self.clobbered_blocks = clobbered_blocks

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

138

self.extended = RangeSet()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

zero_blocks = []

nonzero_blocks = []

reference = '\0' * self.blocksize

143

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

144

for i in range(self.total_blocks-1 if padded else self.total_blocks):

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

145

d = self.data[i*self.blocksize : (i+1)*self.blocksize]

146

if d == reference:

147

zero_blocks.append(i)

148

zero_blocks.append(i+1)

149

else:

150

nonzero_blocks.append(i)

151

nonzero_blocks.append(i+1)

152

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

153

assert zero_blocks or nonzero_blocks or clobbered_blocks

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

154

Tao Bao

2015-09-08 13:39:40 -0700

[diff] [blame]

155

self.file_map = dict()

156

if zero_blocks:

157

self.file_map["__ZERO"] = RangeSet(data=zero_blocks)

158

if nonzero_blocks:

159

self.file_map["__NONZERO"] = RangeSet(data=nonzero_blocks)

160

if clobbered_blocks:

161

self.file_map["__COPY"] = RangeSet(data=clobbered_blocks)

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

162

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

163

def ReadRangeSet(self, ranges):

164

return [self.data[s*self.blocksize:e*self.blocksize] for (s, e) in ranges]

165

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

166

def TotalSha1(self, include_clobbered_blocks=False):

Tao Bao

2015-09-05 20:35:32 -0700

[diff] [blame]

167

if not include_clobbered_blocks:

168

ranges = self.care_map.subtract(self.clobbered_blocks)

169

return sha1(self.ReadRangeSet(ranges)).hexdigest()

170

else:

171

return sha1(self.data).hexdigest()

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

172

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

173

174

class Transfer(object):

175

def __init__(self, tgt_name, src_name, tgt_ranges, src_ranges, style, by_id):

176

self.tgt_name = tgt_name

177

self.src_name = src_name

178

self.tgt_ranges = tgt_ranges

179

self.src_ranges = src_ranges

180

self.style = style

181

self.intact = (getattr(tgt_ranges, "monotonic", False) and

182

getattr(src_ranges, "monotonic", False))

Tao Bao

b8c8717

2015-03-19 19:42:12 -0700

[diff] [blame]

183

184

# We use OrderedDict rather than dict so that the output is repeatable;

185

# otherwise it would depend on the hash values of the Transfer objects.

186

self.goes_before = OrderedDict()

187

self.goes_after = OrderedDict()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

188

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

189

self.stash_before = []

190

self.use_stash = []

191

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.id = len(by_id)

by_id.append(self)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

195

def NetStashChange(self):

196

return (sum(sr.size() for (_, sr) in self.stash_before) -

197

sum(sr.size() for (_, sr) in self.use_stash))

198

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

199

def ConvertToNew(self):

200

assert self.style != "new"

201

self.use_stash = []

202

self.style = "new"

203

self.src_ranges = RangeSet()

204

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

205

def __str__(self):

206

return (str(self.id) + ": <" + str(self.src_ranges) + " " + self.style +

207

" to " + str(self.tgt_ranges) + ">")

208

209

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

210

@functools.total_ordering

211

class HeapItem(object):

212

def __init__(self, item):

213

self.item = item

214

# Negate the score since python's heap is a min-heap and we want

215

# the maximum score.

216

self.score = -item.score

def clear(self):

self.item = None

def __bool__(self):

return self.item is None

221

def __eq__(self, other):

222

return self.score == other.score

223

def __le__(self, other):

224

return self.score <= other.score

225

226

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

227

# BlockImageDiff works on two image objects. An image object is

228

# anything that provides the following attributes:

229

#

230

# blocksize: the size in bytes of a block, currently must be 4096.

231

#

232

# total_blocks: the total size of the partition/image, in blocks.

233

#

234

# care_map: a RangeSet containing which blocks (in the range [0,

235

# total_blocks) we actually care about; i.e. which blocks contain

236

# data.

237

#

238

# file_map: a dict that partitions the blocks contained in care_map

239

# into smaller domains that are useful for doing diffs on.

240

# (Typically a domain is a file, and the key in file_map is the

241

# pathname.)

242

#

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

243

# clobbered_blocks: a RangeSet containing which blocks contain data

244

# but may be altered by the FS. They need to be excluded when

245

# verifying the partition integrity.

246

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

247

# ReadRangeSet(): a function that takes a RangeSet and returns the

248

# data contained in the image blocks of that RangeSet. The data

249

# is returned as a list or tuple of strings; concatenating the

250

# elements together should produce the requested data.

251

# Implementations are free to break up the data into list/tuple

252

# elements in any way that is convenient.

253

#

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

254

# TotalSha1(): a function that returns (as a hex string) the SHA-1

255

# hash of all the data in the image (ie, all the blocks in the

Tao Bao

2015-06-01 13:40:49 -0700

[diff] [blame]

256

# care_map minus clobbered_blocks, or including the clobbered

257

# blocks if include_clobbered_blocks is True).

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

258

#

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

259

# When creating a BlockImageDiff, the src image may be None, in which

260

# case the list of transfers produced will never read from the

261

# original image.

262

263

class BlockImageDiff(object):

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

264

def __init__(self, tgt, src=None, threads=None, version=4,

265

disable_imgdiff=False):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

266

if threads is None:

267

threads = multiprocessing.cpu_count() // 2

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

268

if threads == 0:

269

threads = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

270

self.threads = threads

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

271

self.version = version

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

272

self.transfers = []

273

self.src_basenames = {}

274

self.src_numpatterns = {}

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

275

self._max_stashed_size = 0

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

276

self.touched_src_ranges = RangeSet()

277

self.touched_src_sha1 = None

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

278

self.disable_imgdiff = disable_imgdiff

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

279

Tao Bao

eba409c

2015-10-21 13:30:43 -0700

[diff] [blame]

280

assert version in (1, 2, 3, 4)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

self.tgt = tgt

if src is None:

src = EmptyImage()

self.src = src

# The updater code that installs the patch always uses 4k blocks.

288

assert tgt.blocksize == 4096

289

assert src.blocksize == 4096

290

291

# The range sets in each filemap should comprise a partition of

292

# the care map.

293

self.AssertPartition(src.care_map, src.file_map.values())

294

self.AssertPartition(tgt.care_map, tgt.file_map.values())

295

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

296

@property

297

def max_stashed_size(self):

298

return self._max_stashed_size

299

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

300

def Compute(self, prefix):

301

# When looking for a source file to use as the diff input for a

302

# target file, we try:

303

# 1) an exact path match if available, otherwise

304

# 2) a exact basename match if available, otherwise

305

# 3) a basename match after all runs of digits are replaced by

306

# "#" if available, otherwise

307

# 4) we have no source for this target.

308

self.AbbreviateSourceNames()

309

self.FindTransfers()

310

311

# Find the ordering dependencies among transfers (this is O(n^2)

312

# in the number of transfers).

313

self.GenerateDigraph()

314

# Find a sequence of transfers that satisfies as many ordering

315

# dependencies as possible (heuristically).

316

self.FindVertexSequence()

317

# Fix up the ordering dependencies that the sequence didn't

318

# satisfy.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

319

if self.version == 1:

320

self.RemoveBackwardEdges()

321

else:

322

self.ReverseBackwardEdges()

323

self.ImproveVertexSequence()

324

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

325

# Ensure the runtime stash size is under the limit.

326

if self.version >= 2 and common.OPTIONS.cache_size is not None:

327

self.ReviseStashSize()

328

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

329

# Double-check our work.

330

self.AssertSequenceGood()

331

332

self.ComputePatches(prefix)

333

self.WriteTransfers(prefix)

334

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

335

def HashBlocks(self, source, ranges): # pylint: disable=no-self-use

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

336

data = source.ReadRangeSet(ranges)

ctx = sha1()

for p in data:

ctx.update(p)

return ctx.hexdigest()

343

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

344

def WriteTransfers(self, prefix):

345

out = []

346

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

347

total = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

348

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

349

stashes = {}

350

stashed_blocks = 0

351

max_stashed_blocks = 0

free_stash_ids = []

next_stash_id = 0

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

356

for xf in self.transfers:

357

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

358

if self.version < 2:

359

assert not xf.stash_before

360

assert not xf.use_stash

361

362

for s, sr in xf.stash_before:

363

assert s not in stashes

364

if free_stash_ids:

365

sid = heapq.heappop(free_stash_ids)

else:

sid = next_stash_id

next_stash_id += 1

stashes[s] = sid

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

370

if self.version == 2:

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

371

stashed_blocks += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

372

out.append("stash %d %s\n" % (sid, sr.to_string_raw()))

373

else:

374

sh = self.HashBlocks(self.src, sr)

if sh in stashes:

stashes[sh] += 1

else:

stashes[sh] = 1

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

379

stashed_blocks += sr.size()

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

380

self.touched_src_ranges = self.touched_src_ranges.union(sr)

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

381

out.append("stash %s %s\n" % (sh, sr.to_string_raw()))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

382

383

if stashed_blocks > max_stashed_blocks:

384

max_stashed_blocks = stashed_blocks

385

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

386

free_string = []

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

387

free_size = 0

Jesse Zhao

7b985f6

2015-03-02 16:53:08 -0800

[diff] [blame]

388

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

389

if self.version == 1:

Tao Bao

4fcb77e

2015-10-21 13:36:01 -0700

[diff] [blame]

390

src_str = xf.src_ranges.to_string_raw() if xf.src_ranges else ""

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

391

elif self.version >= 2:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

392

393

# <# blocks> <src ranges>

394

# OR

395

# <# blocks> <src ranges> <src locs> <stash refs...>

396

# OR

397

# <# blocks> - <stash refs...>

398

399

size = xf.src_ranges.size()

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

400

src_str = [str(size)]

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

401

402

unstashed_src_ranges = xf.src_ranges

403

mapped_stashes = []

404

for s, sr in xf.use_stash:

405

sid = stashes.pop(s)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

406

unstashed_src_ranges = unstashed_src_ranges.subtract(sr)

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

407

sh = self.HashBlocks(self.src, sr)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

408

sr = xf.src_ranges.map_within(sr)

409

mapped_stashes.append(sr)

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

410

if self.version == 2:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

411

src_str.append("%d:%s" % (sid, sr.to_string_raw()))

Tao Bao

bb625d2

2015-08-13 14:44:15 -0700

[diff] [blame]

412

# A stash will be used only once. We need to free the stash

413

# immediately after the use, instead of waiting for the automatic

414

# clean-up at the end. Because otherwise it may take up extra space

415

# and lead to OTA failures.

416

# Bug: 23119955

417

free_string.append("free %d\n" % (sid,))

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

418

free_size += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

419

else:

420

assert sh in stashes

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

421

src_str.append("%s:%s" % (sh, sr.to_string_raw()))

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

422

stashes[sh] -= 1

423

if stashes[sh] == 0:

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

424

free_size += sr.size()

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

425

free_string.append("free %s\n" % (sh))

426

stashes.pop(sh)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

427

heapq.heappush(free_stash_ids, sid)

428

429

if unstashed_src_ranges:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

430

src_str.insert(1, unstashed_src_ranges.to_string_raw())

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

431

if xf.use_stash:

432

mapped_unstashed = xf.src_ranges.map_within(unstashed_src_ranges)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

433

src_str.insert(2, mapped_unstashed.to_string_raw())

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

434

mapped_stashes.append(mapped_unstashed)

435

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

436

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

437

src_str.insert(1, "-")

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

438

self.AssertPartition(RangeSet(data=(0, size)), mapped_stashes)

439

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

440

src_str = " ".join(src_str)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

441

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

442

# all versions:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

# zero <rangeset>

# new <rangeset>

# erase <rangeset>

#

# version 1:

# bsdiff patchstart patchlen <src rangeset> <tgt rangeset>

449

# imgdiff patchstart patchlen <src rangeset> <tgt rangeset>

450

# move <src rangeset> <tgt rangeset>

451

#

452

# version 2:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

453

# bsdiff patchstart patchlen <tgt rangeset> <src_str>

454

# imgdiff patchstart patchlen <tgt rangeset> <src_str>

455

# move <tgt rangeset> <src_str>

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

456

#

457

# version 3:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

458

# bsdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

459

# imgdiff patchstart patchlen srchash tgthash <tgt rangeset> <src_str>

460

# move hash <tgt rangeset> <src_str>

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

461

462

tgt_size = xf.tgt_ranges.size()

463

464

if xf.style == "new":

465

assert xf.tgt_ranges

466

out.append("%s %s\n" % (xf.style, xf.tgt_ranges.to_string_raw()))

467

total += tgt_size

468

elif xf.style == "move":

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

469

assert xf.tgt_ranges

470

assert xf.src_ranges.size() == tgt_size

471

if xf.src_ranges != xf.tgt_ranges:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

472

if self.version == 1:

473

out.append("%s %s %s\n" % (

474

xf.style,

475

xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))

476

elif self.version == 2:

477

out.append("%s %s %s\n" % (

478

xf.style,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

479

xf.tgt_ranges.to_string_raw(), src_str))

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

480

elif self.version >= 3:

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

481

# take into account automatic stashing of overlapping blocks

482

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

483

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

484

if temp_stash_usage > max_stashed_blocks:

485

max_stashed_blocks = temp_stash_usage

486

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

487

self.touched_src_ranges = self.touched_src_ranges.union(

488

xf.src_ranges)

489

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

490

out.append("%s %s %s %s\n" % (

491

xf.style,

492

self.HashBlocks(self.tgt, xf.tgt_ranges),

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

493

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

494

total += tgt_size

495

elif xf.style in ("bsdiff", "imgdiff"):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

496

assert xf.tgt_ranges

497

assert xf.src_ranges

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

498

if self.version == 1:

499

out.append("%s %d %d %s %s\n" % (

500

xf.style, xf.patch_start, xf.patch_len,

501

xf.src_ranges.to_string_raw(), xf.tgt_ranges.to_string_raw()))

502

elif self.version == 2:

503

out.append("%s %d %d %s %s\n" % (

504

xf.style, xf.patch_start, xf.patch_len,

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

505

xf.tgt_ranges.to_string_raw(), src_str))

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

506

elif self.version >= 3:

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

507

# take into account automatic stashing of overlapping blocks

508

if xf.src_ranges.overlaps(xf.tgt_ranges):

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

509

temp_stash_usage = stashed_blocks + xf.src_ranges.size()

Sami Tolvanen

2015-04-17 16:28:08 +0100

[diff] [blame]

510

if temp_stash_usage > max_stashed_blocks:

511

max_stashed_blocks = temp_stash_usage

512

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

513

self.touched_src_ranges = self.touched_src_ranges.union(

514

xf.src_ranges)

515

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

516

out.append("%s %d %d %s %s %s %s\n" % (

517

xf.style,

518

xf.patch_start, xf.patch_len,

519

self.HashBlocks(self.src, xf.src_ranges),

520

self.HashBlocks(self.tgt, xf.tgt_ranges),

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

521

xf.tgt_ranges.to_string_raw(), src_str))

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

522

total += tgt_size

523

elif xf.style == "zero":

524

assert xf.tgt_ranges

525

to_zero = xf.tgt_ranges.subtract(xf.src_ranges)

526

if to_zero:

527

out.append("%s %s\n" % (xf.style, to_zero.to_string_raw()))

528

total += to_zero.size()

529

else:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

530

raise ValueError("unknown transfer style '%s'\n" % xf.style)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

531

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

532

if free_string:

533

out.append("".join(free_string))

caozhiyuan

2015-10-21 15:14:03 +0800

[diff] [blame]

534

stashed_blocks -= free_size

Sami Tolvanen

2014-12-09 16:40:34 +0000

[diff] [blame]

535

Tao Bao

575d68a

2015-08-07 19:49:45 -0700

[diff] [blame]

536

if self.version >= 2 and common.OPTIONS.cache_size is not None:

Tao Bao

8dcf738

2015-05-21 14:09:49 -0700

[diff] [blame]

537

# Sanity check: abort if we're going to need more stash space than

538

# the allowed size (cache_size * threshold). There are two purposes

539

# of having a threshold here. a) Part of the cache may have been

540

# occupied by some recovery logs. b) It will buy us some time to deal

541

# with the oversize issue.

542

cache_size = common.OPTIONS.cache_size

543

stash_threshold = common.OPTIONS.stash_threshold

544

max_allowed = cache_size * stash_threshold

545

assert max_stashed_blocks * self.tgt.blocksize < max_allowed, \

546

'Stash size %d (%d * %d) exceeds the limit %d (%d * %.2f)' % (

547

max_stashed_blocks * self.tgt.blocksize, max_stashed_blocks,

548

self.tgt.blocksize, max_allowed, cache_size,

549

stash_threshold)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

550

Tao Bao

2016-04-12 15:53:16 -0700

[diff] [blame]

551

if self.version >= 3:

552

self.touched_src_sha1 = self.HashBlocks(

553

self.src, self.touched_src_ranges)

554

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

555

# Zero out extended blocks as a workaround for bug 20881595.

556

if self.tgt.extended:

557

out.append("zero %s\n" % (self.tgt.extended.to_string_raw(),))

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

558

total += self.tgt.extended.size()

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

559

560

# We erase all the blocks on the partition that a) don't contain useful

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

561

# data in the new image; b) will not be touched by dm-verity. Out of those

562

# blocks, we erase the ones that won't be used in this update at the

563

# beginning of an update. The rest would be erased at the end. This is to

564

# work around the eMMC issue observed on some devices, which may otherwise

565

# get starving for clean blocks and thus fail the update. (b/28347095)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

566

all_tgt = RangeSet(data=(0, self.tgt.total_blocks))

Tao Bao

2015-07-09 17:37:49 -0700

[diff] [blame]

567

all_tgt_minus_extended = all_tgt.subtract(self.tgt.extended)

568

new_dontcare = all_tgt_minus_extended.subtract(self.tgt.care_map)

Tao Bao

66f1fa6

2016-05-03 10:02:01 -0700

[diff] [blame]

569

570

erase_first = new_dontcare.subtract(self.touched_src_ranges)

571

if erase_first:

572

out.insert(0, "erase %s\n" % (erase_first.to_string_raw(),))

573

574

erase_last = new_dontcare.subtract(erase_first)

575

if erase_last:

576

out.append("erase %s\n" % (erase_last.to_string_raw(),))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

577

578

out.insert(0, "%d\n" % (self.version,)) # format version number

Tao Bao

b32d56e

2015-09-09 11:55:01 -0700

[diff] [blame]

579

out.insert(1, "%d\n" % (total,))

Doug Zongker

e985f6f

2014-09-09 12:38:47 -0700

[diff] [blame]

580

if self.version >= 2:

581

# version 2 only: after the total block count, we give the number

582

# of stash slots needed, and the maximum size needed (in blocks)

583

out.insert(2, str(next_stash_id) + "\n")

584

out.insert(3, str(max_stashed_blocks) + "\n")

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

585

586

with open(prefix + ".transfer.list", "wb") as f:

for i in out:

f.write(i)

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

590

if self.version >= 2:

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

591

self._max_stashed_size = max_stashed_blocks * self.tgt.blocksize

Tao Bao

575d68a

2015-08-07 19:49:45 -0700

[diff] [blame]

592

OPTIONS = common.OPTIONS

593

if OPTIONS.cache_size is not None:

594

max_allowed = OPTIONS.cache_size * OPTIONS.stash_threshold

595

print("max stashed blocks: %d (%d bytes), "

596

"limit: %d bytes (%.2f%%)\n" % (

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

597

max_stashed_blocks, self._max_stashed_size, max_allowed,

598

self._max_stashed_size * 100.0 / max_allowed))

Tao Bao

575d68a

2015-08-07 19:49:45 -0700

[diff] [blame]

599

else:

600

print("max stashed blocks: %d (%d bytes), limit: <unknown>\n" % (

Tao Bao

2016-02-04 14:26:02 -0800

[diff] [blame]

601

max_stashed_blocks, self._max_stashed_size))

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

602

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

603

def ReviseStashSize(self):

604

print("Revising stash size...")

605

stashes = {}

606

607

# Create the map between a stash and its def/use points. For example, for a

608

# given stash of (idx, sr), stashes[idx] = (sr, def_cmd, use_cmd).

609

for xf in self.transfers:

610

# Command xf defines (stores) all the stashes in stash_before.

611

for idx, sr in xf.stash_before:

612

stashes[idx] = (sr, xf)

613

614

# Record all the stashes command xf uses.

615

for idx, _ in xf.use_stash:

616

stashes[idx] += (xf,)

617

618

# Compute the maximum blocks available for stash based on /cache size and

619

# the threshold.

620

cache_size = common.OPTIONS.cache_size

621

stash_threshold = common.OPTIONS.stash_threshold

622

max_allowed = cache_size * stash_threshold / self.tgt.blocksize

623

624

stashed_blocks = 0

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

625

new_blocks = 0

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

626

627

# Now go through all the commands. Compute the required stash size on the

628

# fly. If a command requires excess stash than available, it deletes the

629

# stash by replacing the command that uses the stash with a "new" command

630

# instead.

631

for xf in self.transfers:

632

replaced_cmds = []

633

634

# xf.stash_before generates explicit stash commands.

635

for idx, sr in xf.stash_before:

636

if stashed_blocks + sr.size() > max_allowed:

637

# We cannot stash this one for a later command. Find out the command

638

# that will use this stash and replace the command with "new".

639

use_cmd = stashes[idx][2]

640

replaced_cmds.append(use_cmd)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

641

print("%10d %9s %s" % (sr.size(), "explicit", use_cmd))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

642

else:

643

stashed_blocks += sr.size()

644

645

# xf.use_stash generates free commands.

646

for _, sr in xf.use_stash:

647

stashed_blocks -= sr.size()

648

649

# "move" and "diff" may introduce implicit stashes in BBOTA v3. Prior to

650

# ComputePatches(), they both have the style of "diff".

651

if xf.style == "diff" and self.version >= 3:

652

assert xf.tgt_ranges and xf.src_ranges

653

if xf.src_ranges.overlaps(xf.tgt_ranges):

654

if stashed_blocks + xf.src_ranges.size() > max_allowed:

655

replaced_cmds.append(xf)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

656

print("%10d %9s %s" % (xf.src_ranges.size(), "implicit", xf))

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

657

658

# Replace the commands in replaced_cmds with "new"s.

659

for cmd in replaced_cmds:

660

# It no longer uses any commands in "use_stash". Remove the def points

661

# for all those stashes.

662

for idx, sr in cmd.use_stash:

663

def_cmd = stashes[idx][1]

664

assert (idx, sr) in def_cmd.stash_before

665

def_cmd.stash_before.remove((idx, sr))

666

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

667

# Add up blocks that violates space limit and print total number to

668

# screen later.

669

new_blocks += cmd.tgt_ranges.size()

Tao Bao

2015-08-17 09:45:13 -0700

[diff] [blame]

670

cmd.ConvertToNew()

671

Tianjie Xu

ebe39a0

2016-01-14 14:12:26 -0800

[diff] [blame]

672

num_of_bytes = new_blocks * self.tgt.blocksize

673

print(" Total %d blocks (%d bytes) are packed as new blocks due to "

674

"insufficient cache size." % (new_blocks, num_of_bytes))

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

675

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

676

def ComputePatches(self, prefix):

677

print("Reticulating splines...")

678

diff_q = []

679

patch_num = 0

680

with open(prefix + ".new.dat", "wb") as new_f:

681

for xf in self.transfers:

682

if xf.style == "zero":

683

pass

684

elif xf.style == "new":

685

for piece in self.tgt.ReadRangeSet(xf.tgt_ranges):

686

new_f.write(piece)

687

elif xf.style == "diff":

688

src = self.src.ReadRangeSet(xf.src_ranges)

689

tgt = self.tgt.ReadRangeSet(xf.tgt_ranges)

690

691

# We can't compare src and tgt directly because they may have

692

# the same content but be broken up into blocks differently, eg:

693

#

694

# ["he", "llo"] vs ["h", "ello"]

695

#

696

# We want those to compare equal, ideally without having to

697

# actually concatenate the strings (these may be tens of

# megabytes).

src_sha1 = sha1()

for p in src:

src_sha1.update(p)

tgt_sha1 = sha1()

tgt_size = 0

for p in tgt:

tgt_sha1.update(p)

tgt_size += len(p)

if src_sha1.digest() == tgt_sha1.digest():

710

# These are identical; we don't need to generate a patch,

711

# just issue copy commands on the device.

712

xf.style = "move"

713

else:

714

# For files in zip format (eg, APKs, JARs, etc.) we would

715

# like to use imgdiff -z if possible (because it usually

716

# produces significantly smaller patches than bsdiff).

717

# This is permissible if:

718

#

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

719

# - imgdiff is not disabled, and

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

720

# - the source and target files are monotonic (ie, the

721

# data is stored with blocks in increasing order), and

722

# - we haven't removed any blocks from the source set.

723

#

724

# If these conditions are satisfied then appending all the

725

# blocks in the set together in order will produce a valid

726

# zip file (plus possibly extra zeros in the last block),

727

# which is what imgdiff needs to operate. (imgdiff is

728

# fine with extra zeros at the end of the file.)

Tao Bao

2016-06-11 12:19:23 -0700

[diff] [blame]

729

imgdiff = (not self.disable_imgdiff and xf.intact and

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

730

xf.tgt_name.split(".")[-1].lower()

731

in ("apk", "jar", "zip"))

732

xf.style = "imgdiff" if imgdiff else "bsdiff"

733

diff_q.append((tgt_size, src, tgt, xf, patch_num))

patch_num += 1

else:

assert False, "unknown style " + xf.style

if diff_q:

if self.threads > 1:

print("Computing patches (using %d threads)..." % (self.threads,))

742

else:

743

print("Computing patches...")

744

diff_q.sort()

745

746

patches = [None] * patch_num

747

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

748

# TODO: Rewrite with multiprocessing.ThreadPool?

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

749

lock = threading.Lock()

750

def diff_worker():

751

while True:

752

with lock:

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

753

if not diff_q:

754

return

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

755

tgt_size, src, tgt, xf, patchnum = diff_q.pop()

756

patch = compute_patch(src, tgt, imgdiff=(xf.style == "imgdiff"))

757

size = len(patch)

758

with lock:

759

patches[patchnum] = (patch, xf)

760

print("%10d %10d (%6.2f%%) %7s %s" % (

761

size, tgt_size, size * 100.0 / tgt_size, xf.style,

762

xf.tgt_name if xf.tgt_name == xf.src_name else (

763

xf.tgt_name + " (from " + xf.src_name + ")")))

764

765

threads = [threading.Thread(target=diff_worker)

Dan Albert

2015-03-23 19:13:21 -0700

[diff] [blame]

766

for _ in range(self.threads)]

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

for th in threads:

th.start()

while threads:

threads.pop().join()

else:

patches = []

p = 0

with open(prefix + ".patch.dat", "wb") as patch_f:

776

for patch, xf in patches:

777

xf.patch_start = p

778

xf.patch_len = len(patch)

patch_f.write(patch)

p += len(patch)

def AssertSequenceGood(self):

783

# Simulate the sequences of transfers we will output, and check that:

784

# - we never read a block after writing it, and

785

# - we write every block we care about exactly once.

786

787

# Start with no blocks having been touched yet.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

788

touched = array.array("B", "\0" * self.tgt.total_blocks)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

789

790

# Imagine processing the transfers in order.

791

for xf in self.transfers:

792

# Check that the input blocks for this transfer haven't yet been touched.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

793

794

x = xf.src_ranges

795

if self.version >= 2:

796

for _, sr in xf.use_stash:

797

x = x.subtract(sr)

798

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

799

for s, e in x:

Tao Bao

ff75c23

2016-03-04 15:23:34 -0800

[diff] [blame]

800

# Source image could be larger. Don't check the blocks that are in the

801

# source image only. Since they are not in 'touched', and won't ever

802

# be touched.

803

for i in range(s, min(e, self.tgt.total_blocks)):

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

804

assert touched[i] == 0

805

806

# Check that the output blocks for this transfer haven't yet

807

# been touched, and touch all the blocks written by this

808

# transfer.

809

for s, e in xf.tgt_ranges:

810

for i in range(s, e):

811

assert touched[i] == 0

812

touched[i] = 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

813

814

# Check that we've written every target block.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

815

for s, e in self.tgt.care_map:

816

for i in range(s, e):

817

assert touched[i] == 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

818

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

819

def ImproveVertexSequence(self):

820

print("Improving vertex order...")

821

822

# At this point our digraph is acyclic; we reversed any edges that

823

# were backwards in the heuristically-generated sequence. The

824

# previously-generated order is still acceptable, but we hope to

825

# find a better order that needs less memory for stashed data.

826

# Now we do a topological sort to generate a new vertex order,

827

# using a greedy algorithm to choose which vertex goes next

828

# whenever we have a choice.

829

830

# Make a copy of the edge set; this copy will get destroyed by the

831

# algorithm.

832

for xf in self.transfers:

833

xf.incoming = xf.goes_after.copy()

834

xf.outgoing = xf.goes_before.copy()

835

836

L = [] # the new vertex order

837

838

# S is the set of sources in the remaining graph; we always choose

839

# the one that leaves the least amount of stashed data after it's

840

# executed.

841

S = [(u.NetStashChange(), u.order, u) for u in self.transfers

if not u.incoming]

heapq.heapify(S)

while S:

_, _, xf = heapq.heappop(S)

847

L.append(xf)

848

for u in xf.outgoing:

849

del u.incoming[xf]

850

if not u.incoming:

851

heapq.heappush(S, (u.NetStashChange(), u.order, u))

852

853

# if this fails then our graph had a cycle.

854

assert len(L) == len(self.transfers)

855

856

self.transfers = L

857

for i, xf in enumerate(L):

858

xf.order = i

859

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

860

def RemoveBackwardEdges(self):

861

print("Removing backward edges...")

in_order = 0

out_of_order = 0

lost_source = 0

for xf in self.transfers:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

867

lost = 0

868

size = xf.src_ranges.size()

869

for u in xf.goes_before:

870

# xf should go before u

871

if xf.order < u.order:

872

# it does, hurray!

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

873

in_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

874

else:

875

# it doesn't, boo. trim the blocks that u writes from xf's

876

# source, so that xf can go after u.

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

877

out_of_order += 1

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

878

assert xf.src_ranges.overlaps(u.tgt_ranges)

879

xf.src_ranges = xf.src_ranges.subtract(u.tgt_ranges)

880

xf.intact = False

881

882

if xf.style == "diff" and not xf.src_ranges:

883

# nothing left to diff from; treat as new data

884

xf.style = "new"

885

886

lost = size - xf.src_ranges.size()

887

lost_source += lost

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

888

889

print((" %d/%d dependencies (%.2f%%) were violated; "

890

"%d source blocks removed.") %

891

(out_of_order, in_order + out_of_order,

892

(out_of_order * 100.0 / (in_order + out_of_order))

893

if (in_order + out_of_order) else 0.0,

894

lost_source))

895

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

896

def ReverseBackwardEdges(self):

897

print("Reversing backward edges...")

in_order = 0

out_of_order = 0

stashes = 0

stash_size = 0

for xf in self.transfers:

Doug Zongker

2014-09-08 08:29:55 -0700

[diff] [blame]

904

for u in xf.goes_before.copy():

905

# xf should go before u

906

if xf.order < u.order:

# it does, hurray!

in_order += 1

else:

# it doesn't, boo. modify u to stash the blocks that it

911

# writes that xf wants to read, and then require u to go

# before xf.

out_of_order += 1

overlap = xf.src_ranges.intersect(u.tgt_ranges)

916

assert overlap

917

918

u.stash_before.append((stashes, overlap))

919

xf.use_stash.append((stashes, overlap))

920

stashes += 1

921

stash_size += overlap.size()

922

923

# reverse the edge direction; now xf must go after u

924

del xf.goes_before[u]

925

del u.goes_after[xf]

926

xf.goes_after[u] = None # value doesn't matter

927

u.goes_before[xf] = None

928

929

print((" %d/%d dependencies (%.2f%%) were violated; "

930

"%d source blocks stashed.") %

931

(out_of_order, in_order + out_of_order,

932

(out_of_order * 100.0 / (in_order + out_of_order))

933

if (in_order + out_of_order) else 0.0,

934

stash_size))

935

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

936

def FindVertexSequence(self):

937

print("Finding vertex sequence...")

938

939

# This is based on "A Fast & Effective Heuristic for the Feedback

940

# Arc Set Problem" by P. Eades, X. Lin, and W.F. Smyth. Think of

941

# it as starting with the digraph G and moving all the vertices to

942

# be on a horizontal line in some order, trying to minimize the

943

# number of edges that end up pointing to the left. Left-pointing

944

# edges will get removed to turn the digraph into a DAG. In this

945

# case each edge has a weight which is the number of source blocks

946

# we'll lose if that edge is removed; we try to minimize the total

947

# weight rather than just the number of edges.

948

949

# Make a copy of the edge set; this copy will get destroyed by the

950

# algorithm.

951

for xf in self.transfers:

952

xf.incoming = xf.goes_after.copy()

953

xf.outgoing = xf.goes_before.copy()

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

954

xf.score = sum(xf.outgoing.values()) - sum(xf.incoming.values())

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

955

956

# We use an OrderedDict instead of just a set so that the output

957

# is repeatable; otherwise it would depend on the hash values of

958

# the transfer objects.

959

G = OrderedDict()

960

for xf in self.transfers:

961

G[xf] = None

962

s1 = deque() # the left side of the sequence, built from left to right

963

s2 = deque() # the right side of the sequence, built from right to left

964

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

965

heap = []

966

for xf in self.transfers:

967

xf.heap_item = HeapItem(xf)

968

heap.append(xf.heap_item)

969

heapq.heapify(heap)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

970

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

971

sinks = set(u for u in G if not u.outgoing)

972

sources = set(u for u in G if not u.incoming)

973

974

def adjust_score(iu, delta):

975

iu.score += delta

976

iu.heap_item.clear()

977

iu.heap_item = HeapItem(iu)

978

heapq.heappush(heap, iu.heap_item)

979

980

while G:

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

981

# Put all sinks at the end of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

982

while sinks:

983

new_sinks = set()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

984

for u in sinks:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

985

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

986

s2.appendleft(u)

987

del G[u]

988

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

989

adjust_score(iu, -iu.outgoing.pop(u))

990

if not iu.outgoing: new_sinks.add(iu)

991

sinks = new_sinks

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

992

993

# Put all the sources at the beginning of the sequence.

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

994

while sources:

995

new_sources = set()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

996

for u in sources:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

997

if u not in G: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

998

s1.append(u)

999

del G[u]

1000

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1001

adjust_score(iu, +iu.incoming.pop(u))

1002

if not iu.incoming: new_sources.add(iu)

1003

sources = new_sources

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1004

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1005

if not G: break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1006

1007

# Find the "best" vertex to put next. "Best" is the one that

1008

# maximizes the net difference in source blocks saved we get by

1009

# pretending it's a source rather than a sink.

1010

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1011

while True:

1012

u = heapq.heappop(heap)

1013

if u and u.item in G:

1014

u = u.item

1015

break

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1016

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1017

s1.append(u)

1018

del G[u]

1019

for iu in u.outgoing:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1020

adjust_score(iu, +iu.incoming.pop(u))

1021

if not iu.incoming: sources.add(iu)

1022

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1023

for iu in u.incoming:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1024

adjust_score(iu, -iu.outgoing.pop(u))

1025

if not iu.outgoing: sinks.add(iu)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1026

1027

# Now record the sequence in the 'order' field of each transfer,

1028

# and by rearranging self.transfers to be in the chosen sequence.

1029

1030

new_transfers = []

1031

for x in itertools.chain(s1, s2):

1032

x.order = len(new_transfers)

1033

new_transfers.append(x)

del x.incoming

del x.outgoing

self.transfers = new_transfers

1038

1039

def GenerateDigraph(self):

1040

print("Generating digraph...")

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1041

1042

# Each item of source_ranges will be:

1043

# - None, if that block is not used as a source,

1044

# - a transfer, if one transfer uses it as a source, or

1045

# - a set of transfers.

1046

source_ranges = []

1047

for b in self.transfers:

1048

for s, e in b.src_ranges:

1049

if e > len(source_ranges):

1050

source_ranges.extend([None] * (e-len(source_ranges)))

1051

for i in range(s, e):

1052

if source_ranges[i] is None:

1053

source_ranges[i] = b

1054

else:

1055

if not isinstance(source_ranges[i], set):

1056

source_ranges[i] = set([source_ranges[i]])

1057

source_ranges[i].add(b)

1058

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1059

for a in self.transfers:

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1060

intersections = set()

1061

for s, e in a.tgt_ranges:

1062

for i in range(s, e):

1063

if i >= len(source_ranges): break

1064

b = source_ranges[i]

1065

if b is not None:

1066

if isinstance(b, set):

1067

intersections.update(b)

else:

intersections.add(b)

for b in intersections:

1072

if a is b: continue

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1073

1074

# If the blocks written by A are read by B, then B needs to go before A.

1075

i = a.tgt_ranges.intersect(b.src_ranges)

1076

if i:

Doug Zongker

2014-08-26 10:40:28 -0700

[diff] [blame]

1077

if b.src_name == "__ZERO":

1078

# the cost of removing source blocks for the __ZERO domain

# is (nearly) zero.

size = 0

else:

size = i.size()

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1083

b.goes_before[a] = size

1084

a.goes_after[b] = size

1085

1086

def FindTransfers(self):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1087

"""Parse the file_map to generate all the transfers."""

1088

1089

def AddTransfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id,

1090

split=False):

1091

"""Wrapper function for adding a Transfer().

1092

1093

For BBOTA v3, we need to stash source blocks for resumable feature.

1094

However, with the growth of file size and the shrink of the cache

1095

partition source blocks are too large to be stashed. If a file occupies

1096

too many blocks (greater than MAX_BLOCKS_PER_DIFF_TRANSFER), we split it

1097

into smaller pieces by getting multiple Transfer()s.

1098

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1099

The downside is that after splitting, we may increase the package size

1100

since the split pieces don't align well. According to our experiments,

1101

1/8 of the cache size as the per-piece limit appears to be optimal.

1102

Compared to the fixed 1024-block limit, it reduces the overall package

1103

size by 30% volantis, and 20% for angler and bullhead."""

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1104

1105

# We care about diff transfers only.

1106

if style != "diff" or not split:

1107

Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1108

return

1109

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1110

pieces = 0

1111

cache_size = common.OPTIONS.cache_size

1112

split_threshold = 0.125

1113

max_blocks_per_transfer = int(cache_size * split_threshold /

1114

self.tgt.blocksize)

1115

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1116

# Change nothing for small files.

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1117

if (tgt_ranges.size() <= max_blocks_per_transfer and

1118

src_ranges.size() <= max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1119

Transfer(tgt_name, src_name, tgt_ranges, src_ranges, style, by_id)

1120

return

1121

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1122

while (tgt_ranges.size() > max_blocks_per_transfer and

1123

src_ranges.size() > max_blocks_per_transfer):

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1124

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1125

src_split_name = "%s-%d" % (src_name, pieces)

Tianjie Xu

2016-01-13 16:14:10 -0800

[diff] [blame]

1126

tgt_first = tgt_ranges.first(max_blocks_per_transfer)

1127

src_first = src_ranges.first(max_blocks_per_transfer)

1128

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1129

Transfer(tgt_split_name, src_split_name, tgt_first, src_first, style,

1130

by_id)

1131

1132

tgt_ranges = tgt_ranges.subtract(tgt_first)

1133

src_ranges = src_ranges.subtract(src_first)

1134

pieces += 1

1135

1136

# Handle remaining blocks.

1137

if tgt_ranges.size() or src_ranges.size():

1138

# Must be both non-empty.

1139

assert tgt_ranges.size() and src_ranges.size()

1140

tgt_split_name = "%s-%d" % (tgt_name, pieces)

1141

src_split_name = "%s-%d" % (src_name, pieces)

1142

Transfer(tgt_split_name, src_split_name, tgt_ranges, src_ranges, style,

1143

by_id)

1144

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1145

empty = RangeSet()

1146

for tgt_fn, tgt_ranges in self.tgt.file_map.items():

1147

if tgt_fn == "__ZERO":

1148

# the special "__ZERO" domain is all the blocks not contained

1149

# in any file and that are filled with zeros. We have a

1150

# special transfer style for zero blocks.

1151

src_ranges = self.src.file_map.get("__ZERO", empty)

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1152

AddTransfer(tgt_fn, "__ZERO", tgt_ranges, src_ranges,

1153

"zero", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1154

continue

1155

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

1156

elif tgt_fn == "__COPY":

1157

# "__COPY" domain includes all the blocks not contained in any

1158

# file and that need to be copied unconditionally to the target.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1159

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Tao Bao

2015-05-12 11:42:31 -0700

[diff] [blame]

1160

continue

1161

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1162

elif tgt_fn in self.src.file_map:

1163

# Look for an exact pathname match in the source.

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1164

AddTransfer(tgt_fn, tgt_fn, tgt_ranges, self.src.file_map[tgt_fn],

1165

"diff", self.transfers, self.version >= 3)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1166

continue

1167

1168

b = os.path.basename(tgt_fn)

1169

if b in self.src_basenames:

1170

# Look for an exact basename match in the source.

1171

src_fn = self.src_basenames[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1172

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

1173

"diff", self.transfers, self.version >= 3)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1174

continue

1175

1176

b = re.sub("[0-9]+", "#", b)

1177

if b in self.src_numpatterns:

1178

# Look for a 'number pattern' match (a basename match after

1179

# all runs of digits are replaced by "#"). (This is useful

1180

# for .so files that contain version numbers in the filename

1181

# that get bumped.)

1182

src_fn = self.src_numpatterns[b]

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1183

AddTransfer(tgt_fn, src_fn, tgt_ranges, self.src.file_map[src_fn],

1184

"diff", self.transfers, self.version >= 3)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1185

continue

1186

Tao Bao

2015-08-25 15:10:10 -0700

[diff] [blame]

1187

AddTransfer(tgt_fn, None, tgt_ranges, empty, "new", self.transfers)

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1188

1189

def AbbreviateSourceNames(self):

Doug Zongker

2014-08-26 13:10:25 -0700

[diff] [blame]

1190

for k in self.src.file_map.keys():

1191

b = os.path.basename(k)

1192

self.src_basenames[b] = k

1193

b = re.sub("[0-9]+", "#", b)

1194

self.src_numpatterns[b] = k

1195

1196

@staticmethod

1197

def AssertPartition(total, seq):

1198

"""Assert that all the RangeSets in 'seq' form a partition of the

1199

'total' RangeSet (ie, they are nonintersecting and their union

1200

equals 'total')."""

Doug Zongker

2016-02-09 08:28:09 -0800

[diff] [blame]

1201

Doug Zongker