blob: 8af61c35556b5bceb8906efafad0a11ea15b77c6 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22 """A RangeSet represents a set of nonoverlapping ranges on the
23 integers (ie, a set of integers, but efficient when the set contains
24 lots of runs."""
25
26 def __init__(self, data=None):
Dan Albert8b72aef2015-03-23 19:13:21 -070027 self.monotonic = False
Tao Baoc765cca2018-01-31 17:32:40 -080028 self._extra = {}
Doug Zongker846cb3a2014-09-09 10:55:36 -070029 if isinstance(data, str):
30 self._parse_internal(data)
31 elif data:
Tao Baoe8f75612015-08-26 17:07:14 -070032 assert len(data) % 2 == 0
Doug Zongkerfc44a512014-08-26 13:10:25 -070033 self.data = tuple(self._remove_pairs(data))
Tao Baoe8f75612015-08-26 17:07:14 -070034 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
Doug Zongkerfc44a512014-08-26 13:10:25 -070035 else:
36 self.data = ()
37
38 def __iter__(self):
39 for i in range(0, len(self.data), 2):
40 yield self.data[i:i+2]
41
42 def __eq__(self, other):
43 return self.data == other.data
Tao Baoe8f75612015-08-26 17:07:14 -070044
Doug Zongkerfc44a512014-08-26 13:10:25 -070045 def __ne__(self, other):
46 return self.data != other.data
Tao Baoe8f75612015-08-26 17:07:14 -070047
Doug Zongkerfc44a512014-08-26 13:10:25 -070048 def __nonzero__(self):
49 return bool(self.data)
50
51 def __str__(self):
52 if not self.data:
53 return "empty"
54 else:
55 return self.to_string()
56
Doug Zongker846cb3a2014-09-09 10:55:36 -070057 def __repr__(self):
58 return '<RangeSet("' + self.to_string() + '")>'
59
Tao Baoc765cca2018-01-31 17:32:40 -080060 @property
61 def extra(self):
62 return self._extra
63
Doug Zongkerfc44a512014-08-26 13:10:25 -070064 @classmethod
65 def parse(cls, text):
66 """Parse a text string consisting of a space-separated list of
67 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to
68 include both their ends (so the above example represents 18
69 individual blocks. Returns a RangeSet object.
70
71 If the input has all its blocks in increasing order, then returned
72 RangeSet will have an extra attribute 'monotonic' that is set to
73 True. For example the input "10-20 30" is monotonic, but the input
74 "15-20 30 10-14" is not, even though they represent the same set
75 of blocks (and the two RangeSets will compare equal with ==).
76 """
Doug Zongker846cb3a2014-09-09 10:55:36 -070077 return cls(text)
Doug Zongkerfc44a512014-08-26 13:10:25 -070078
Tao Bao8179d682016-03-24 11:08:51 -070079 @classmethod
80 def parse_raw(cls, text):
81 """Parse a string generated by RangeSet.to_string_raw().
82
83 >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw())
84 <RangeSet("0-9")>
85 """
86
87 raw = [int(i) for i in text.split(',')]
88 assert raw[0] == len(raw[1:]), "Invalid raw string."
89
90 return cls(data=raw[1:])
91
Doug Zongker846cb3a2014-09-09 10:55:36 -070092 def _parse_internal(self, text):
Doug Zongkerfc44a512014-08-26 13:10:25 -070093 data = []
94 last = -1
95 monotonic = True
96 for p in text.split():
97 if "-" in p:
Tao Baoe8f75612015-08-26 17:07:14 -070098 s, e = (int(x) for x in p.split("-"))
99 data.append(s)
100 data.append(e+1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700101 if last <= s <= e:
102 last = e
103 else:
104 monotonic = False
105 else:
106 s = int(p)
107 data.append(s)
108 data.append(s+1)
109 if last <= s:
110 last = s+1
111 else:
Tianjie Xucd1e16a2016-04-07 20:17:48 -0700112 monotonic = False
Doug Zongkerfc44a512014-08-26 13:10:25 -0700113 data.sort()
Doug Zongker846cb3a2014-09-09 10:55:36 -0700114 self.data = tuple(self._remove_pairs(data))
115 self.monotonic = monotonic
Doug Zongkerfc44a512014-08-26 13:10:25 -0700116
117 @staticmethod
118 def _remove_pairs(source):
Tao Baoe8f75612015-08-26 17:07:14 -0700119 """Remove consecutive duplicate items to simplify the result.
120
121 [1, 2, 2, 5, 5, 10] will become [1, 10]."""
Doug Zongkerfc44a512014-08-26 13:10:25 -0700122 last = None
123 for i in source:
124 if i == last:
125 last = None
126 else:
127 if last is not None:
128 yield last
129 last = i
130 if last is not None:
131 yield last
132
133 def to_string(self):
134 out = []
135 for i in range(0, len(self.data), 2):
136 s, e = self.data[i:i+2]
137 if e == s+1:
138 out.append(str(s))
139 else:
140 out.append(str(s) + "-" + str(e-1))
141 return " ".join(out)
142
143 def to_string_raw(self):
Tao Baoe8f75612015-08-26 17:07:14 -0700144 assert self.data
Doug Zongkerfc44a512014-08-26 13:10:25 -0700145 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
146
147 def union(self, other):
148 """Return a new RangeSet representing the union of this RangeSet
Doug Zongker846cb3a2014-09-09 10:55:36 -0700149 with the argument.
150
151 >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
152 <RangeSet("10-34")>
153 >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
154 <RangeSet("10-19 22 30-34")>
155 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700156 out = []
157 z = 0
158 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
159 zip(other.data, itertools.cycle((+1, -1)))):
160 if (z == 0 and d == 1) or (z == 1 and d == -1):
161 out.append(p)
162 z += d
163 return RangeSet(data=out)
164
165 def intersect(self, other):
166 """Return a new RangeSet representing the intersection of this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700167 RangeSet with the argument.
168
169 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
170 <RangeSet("18-19 30-32")>
171 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
172 <RangeSet("")>
173 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700174 out = []
175 z = 0
176 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
177 zip(other.data, itertools.cycle((+1, -1)))):
178 if (z == 1 and d == 1) or (z == 2 and d == -1):
179 out.append(p)
180 z += d
181 return RangeSet(data=out)
182
183 def subtract(self, other):
184 """Return a new RangeSet representing subtracting the argument
Doug Zongker846cb3a2014-09-09 10:55:36 -0700185 from this RangeSet.
186
187 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
188 <RangeSet("10-17 33-34")>
189 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
190 <RangeSet("10-19 30-34")>
191 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700192
193 out = []
194 z = 0
195 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
196 zip(other.data, itertools.cycle((-1, +1)))):
197 if (z == 0 and d == 1) or (z == 1 and d == -1):
198 out.append(p)
199 z += d
200 return RangeSet(data=out)
201
202 def overlaps(self, other):
203 """Returns true if the argument has a nonempty overlap with this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700204 RangeSet.
205
206 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
207 True
208 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
209 False
210 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700211
212 # This is like intersect, but we can stop as soon as we discover the
213 # output is going to be nonempty.
214 z = 0
Dan Albert8b72aef2015-03-23 19:13:21 -0700215 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
Doug Zongkerfc44a512014-08-26 13:10:25 -0700216 zip(other.data, itertools.cycle((+1, -1)))):
217 if (z == 1 and d == 1) or (z == 2 and d == -1):
218 return True
219 z += d
220 return False
221
222 def size(self):
223 """Returns the total size of the RangeSet (ie, how many integers
Doug Zongker846cb3a2014-09-09 10:55:36 -0700224 are in the set).
225
226 >>> RangeSet("10-19 30-34").size()
227 15
228 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700229
230 total = 0
231 for i, p in enumerate(self.data):
232 if i % 2:
233 total += p
234 else:
235 total -= p
236 return total
Doug Zongker62338182014-09-08 08:29:55 -0700237
238 def map_within(self, other):
239 """'other' should be a subset of 'self'. Returns a RangeSet
240 representing what 'other' would get translated to if the integers
241 of 'self' were translated down to be contiguous starting at zero.
242
Doug Zongker846cb3a2014-09-09 10:55:36 -0700243 >>> RangeSet("0-9").map_within(RangeSet("3-4"))
244 <RangeSet("3-4")>
245 >>> RangeSet("10-19").map_within(RangeSet("13-14"))
246 <RangeSet("3-4")>
247 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
248 <RangeSet("7-12")>
249 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
250 <RangeSet("2-3 7-12")>
Doug Zongker62338182014-09-08 08:29:55 -0700251 """
252
253 out = []
254 offset = 0
255 start = None
256 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
257 zip(other.data, itertools.cycle((-1, +1)))):
258 if d == -5:
259 start = p
260 elif d == +5:
261 offset += p-start
262 start = None
263 else:
264 out.append(offset + p - start)
265 return RangeSet(data=out)
266
Tao Baoe9b61912015-07-09 17:37:49 -0700267 def extend(self, n):
268 """Extend the RangeSet by 'n' blocks.
269
270 The lower bound is guaranteed to be non-negative.
271
272 >>> RangeSet("0-9").extend(1)
273 <RangeSet("0-10")>
274 >>> RangeSet("10-19").extend(15)
275 <RangeSet("0-34")>
276 >>> RangeSet("10-19 30-39").extend(4)
277 <RangeSet("6-23 26-43")>
278 >>> RangeSet("10-19 30-39").extend(10)
279 <RangeSet("0-49")>
280 """
281 out = self
282 for i in range(0, len(self.data), 2):
283 s, e = self.data[i:i+2]
284 s1 = max(0, s - n)
285 e1 = e + n
286 out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
287 return out
288
Tao Bao9a5caf22015-08-25 15:10:10 -0700289 def first(self, n):
290 """Return the RangeSet that contains at most the first 'n' integers.
291
292 >>> RangeSet("0-9").first(1)
293 <RangeSet("0")>
294 >>> RangeSet("10-19").first(5)
295 <RangeSet("10-14")>
296 >>> RangeSet("10-19").first(15)
297 <RangeSet("10-19")>
298 >>> RangeSet("10-19 30-39").first(3)
299 <RangeSet("10-12")>
300 >>> RangeSet("10-19 30-39").first(15)
301 <RangeSet("10-19 30-34")>
302 >>> RangeSet("10-19 30-39").first(30)
303 <RangeSet("10-19 30-39")>
304 >>> RangeSet("0-9").first(0)
305 <RangeSet("")>
306 """
307
308 if self.size() <= n:
309 return self
310
311 out = []
312 for s, e in self:
313 if e - s >= n:
314 out += (s, s+n)
315 break
316 else:
317 out += (s, e)
318 n -= e - s
319 return RangeSet(data=out)
320
Tao Bao08c85832016-09-19 22:26:30 -0700321 def next_item(self):
322 """Return the next integer represented by the RangeSet.
323
324 >>> list(RangeSet("0-9").next_item())
325 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
326 >>> list(RangeSet("10-19 3-5").next_item())
327 [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Tao Baoc0dcbd02017-11-02 12:19:36 -0700328 >>> list(RangeSet("10-19 3 5 7").next_item())
Tao Bao08c85832016-09-19 22:26:30 -0700329 [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
330 """
331 for s, e in self:
332 for element in range(s, e):
333 yield element
334
Doug Zongker62338182014-09-08 08:29:55 -0700335
336if __name__ == "__main__":
337 import doctest
338 doctest.testmod()