blob: 9d6e9fb17653236718077a0505860c5436b627e7 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
Tao Baofe97dbd2018-02-06 15:01:54 -080016
Doug Zongkerfc44a512014-08-26 13:10:25 -070017import heapq
18import itertools
19
Tao Baofe97dbd2018-02-06 15:01:54 -080020
Doug Zongkerfc44a512014-08-26 13:10:25 -070021__all__ = ["RangeSet"]
22
Tao Baofe97dbd2018-02-06 15:01:54 -080023
Doug Zongkerfc44a512014-08-26 13:10:25 -070024class RangeSet(object):
Tao Baofe97dbd2018-02-06 15:01:54 -080025 """A RangeSet represents a set of non-overlapping ranges on integers.
26
27 Attributes:
28 monotonic: Whether the input has all its integers in increasing order.
29 extra: A dict that can be used by the caller, e.g. to store info that's
30 only meaningful to caller.
31 """
Doug Zongkerfc44a512014-08-26 13:10:25 -070032
33 def __init__(self, data=None):
Dan Albert8b72aef2015-03-23 19:13:21 -070034 self.monotonic = False
Tao Baoc765cca2018-01-31 17:32:40 -080035 self._extra = {}
Doug Zongker846cb3a2014-09-09 10:55:36 -070036 if isinstance(data, str):
37 self._parse_internal(data)
38 elif data:
Tao Baoe8f75612015-08-26 17:07:14 -070039 assert len(data) % 2 == 0
Doug Zongkerfc44a512014-08-26 13:10:25 -070040 self.data = tuple(self._remove_pairs(data))
Tao Baoe8f75612015-08-26 17:07:14 -070041 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
Doug Zongkerfc44a512014-08-26 13:10:25 -070042 else:
43 self.data = ()
44
45 def __iter__(self):
46 for i in range(0, len(self.data), 2):
47 yield self.data[i:i+2]
48
49 def __eq__(self, other):
50 return self.data == other.data
Tao Baoe8f75612015-08-26 17:07:14 -070051
Doug Zongkerfc44a512014-08-26 13:10:25 -070052 def __ne__(self, other):
53 return self.data != other.data
Tao Baoe8f75612015-08-26 17:07:14 -070054
Tao Baod660c8d2019-06-19 10:17:21 -070055 def __bool__(self):
Doug Zongkerfc44a512014-08-26 13:10:25 -070056 return bool(self.data)
57
Tao Baod660c8d2019-06-19 10:17:21 -070058 # Python 2 uses __nonzero__, while Python 3 uses __bool__.
59 __nonzero__ = __bool__
60
Doug Zongkerfc44a512014-08-26 13:10:25 -070061 def __str__(self):
62 if not self.data:
63 return "empty"
64 else:
65 return self.to_string()
66
Doug Zongker846cb3a2014-09-09 10:55:36 -070067 def __repr__(self):
68 return '<RangeSet("' + self.to_string() + '")>'
69
Tao Baoc765cca2018-01-31 17:32:40 -080070 @property
71 def extra(self):
72 return self._extra
73
Doug Zongkerfc44a512014-08-26 13:10:25 -070074 @classmethod
75 def parse(cls, text):
Tao Baofe97dbd2018-02-06 15:01:54 -080076 """Parses a text string into a RangeSet.
Doug Zongkerfc44a512014-08-26 13:10:25 -070077
Tao Baofe97dbd2018-02-06 15:01:54 -080078 The input text string consists of a space-separated list of blocks and
79 ranges, e.g. "10-20 30 35-40". Ranges are interpreted to include both their
80 ends (so the above example represents 18 individual blocks). Returns a
81 RangeSet object.
82
83 If the input has all its blocks in increasing order, then the 'monotonic'
84 attribute of the returned RangeSet will be set to True. For example the
85 input "10-20 30" is monotonic, but the input "15-20 30 10-14" is not, even
86 though they represent the same set of blocks (and the two RangeSets will
87 compare equal with ==).
Doug Zongkerfc44a512014-08-26 13:10:25 -070088 """
Doug Zongker846cb3a2014-09-09 10:55:36 -070089 return cls(text)
Doug Zongkerfc44a512014-08-26 13:10:25 -070090
Tao Bao8179d682016-03-24 11:08:51 -070091 @classmethod
92 def parse_raw(cls, text):
93 """Parse a string generated by RangeSet.to_string_raw().
94
95 >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw())
96 <RangeSet("0-9")>
97 """
98
99 raw = [int(i) for i in text.split(',')]
100 assert raw[0] == len(raw[1:]), "Invalid raw string."
101
102 return cls(data=raw[1:])
103
Doug Zongker846cb3a2014-09-09 10:55:36 -0700104 def _parse_internal(self, text):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700105 data = []
106 last = -1
107 monotonic = True
108 for p in text.split():
109 if "-" in p:
Tao Baoe8f75612015-08-26 17:07:14 -0700110 s, e = (int(x) for x in p.split("-"))
111 data.append(s)
112 data.append(e+1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700113 if last <= s <= e:
114 last = e
115 else:
116 monotonic = False
117 else:
118 s = int(p)
119 data.append(s)
120 data.append(s+1)
121 if last <= s:
122 last = s+1
123 else:
Tianjie Xucd1e16a2016-04-07 20:17:48 -0700124 monotonic = False
Doug Zongkerfc44a512014-08-26 13:10:25 -0700125 data.sort()
Doug Zongker846cb3a2014-09-09 10:55:36 -0700126 self.data = tuple(self._remove_pairs(data))
127 self.monotonic = monotonic
Doug Zongkerfc44a512014-08-26 13:10:25 -0700128
129 @staticmethod
130 def _remove_pairs(source):
Tao Baoe8f75612015-08-26 17:07:14 -0700131 """Remove consecutive duplicate items to simplify the result.
132
133 [1, 2, 2, 5, 5, 10] will become [1, 10]."""
Doug Zongkerfc44a512014-08-26 13:10:25 -0700134 last = None
135 for i in source:
136 if i == last:
137 last = None
138 else:
139 if last is not None:
140 yield last
141 last = i
142 if last is not None:
143 yield last
144
145 def to_string(self):
146 out = []
147 for i in range(0, len(self.data), 2):
148 s, e = self.data[i:i+2]
149 if e == s+1:
150 out.append(str(s))
151 else:
152 out.append(str(s) + "-" + str(e-1))
153 return " ".join(out)
154
155 def to_string_raw(self):
Tao Baoe8f75612015-08-26 17:07:14 -0700156 assert self.data
Doug Zongkerfc44a512014-08-26 13:10:25 -0700157 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
158
159 def union(self, other):
160 """Return a new RangeSet representing the union of this RangeSet
Doug Zongker846cb3a2014-09-09 10:55:36 -0700161 with the argument.
162
163 >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
164 <RangeSet("10-34")>
165 >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
166 <RangeSet("10-19 22 30-34")>
167 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700168 out = []
169 z = 0
170 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
171 zip(other.data, itertools.cycle((+1, -1)))):
172 if (z == 0 and d == 1) or (z == 1 and d == -1):
173 out.append(p)
174 z += d
175 return RangeSet(data=out)
176
177 def intersect(self, other):
178 """Return a new RangeSet representing the intersection of this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700179 RangeSet with the argument.
180
181 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
182 <RangeSet("18-19 30-32")>
183 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
184 <RangeSet("")>
185 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700186 out = []
187 z = 0
188 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
189 zip(other.data, itertools.cycle((+1, -1)))):
190 if (z == 1 and d == 1) or (z == 2 and d == -1):
191 out.append(p)
192 z += d
193 return RangeSet(data=out)
194
195 def subtract(self, other):
196 """Return a new RangeSet representing subtracting the argument
Doug Zongker846cb3a2014-09-09 10:55:36 -0700197 from this RangeSet.
198
199 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
200 <RangeSet("10-17 33-34")>
201 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
202 <RangeSet("10-19 30-34")>
203 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700204
205 out = []
206 z = 0
207 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
208 zip(other.data, itertools.cycle((-1, +1)))):
209 if (z == 0 and d == 1) or (z == 1 and d == -1):
210 out.append(p)
211 z += d
212 return RangeSet(data=out)
213
214 def overlaps(self, other):
215 """Returns true if the argument has a nonempty overlap with this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700216 RangeSet.
217
218 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
219 True
220 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
221 False
222 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700223
224 # This is like intersect, but we can stop as soon as we discover the
225 # output is going to be nonempty.
226 z = 0
Dan Albert8b72aef2015-03-23 19:13:21 -0700227 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
Doug Zongkerfc44a512014-08-26 13:10:25 -0700228 zip(other.data, itertools.cycle((+1, -1)))):
229 if (z == 1 and d == 1) or (z == 2 and d == -1):
230 return True
231 z += d
232 return False
233
234 def size(self):
235 """Returns the total size of the RangeSet (ie, how many integers
Doug Zongker846cb3a2014-09-09 10:55:36 -0700236 are in the set).
237
238 >>> RangeSet("10-19 30-34").size()
239 15
240 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700241
242 total = 0
243 for i, p in enumerate(self.data):
244 if i % 2:
245 total += p
246 else:
247 total -= p
248 return total
Doug Zongker62338182014-09-08 08:29:55 -0700249
250 def map_within(self, other):
251 """'other' should be a subset of 'self'. Returns a RangeSet
252 representing what 'other' would get translated to if the integers
253 of 'self' were translated down to be contiguous starting at zero.
254
Doug Zongker846cb3a2014-09-09 10:55:36 -0700255 >>> RangeSet("0-9").map_within(RangeSet("3-4"))
256 <RangeSet("3-4")>
257 >>> RangeSet("10-19").map_within(RangeSet("13-14"))
258 <RangeSet("3-4")>
259 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
260 <RangeSet("7-12")>
261 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
262 <RangeSet("2-3 7-12")>
Doug Zongker62338182014-09-08 08:29:55 -0700263 """
264
265 out = []
266 offset = 0
267 start = None
268 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
269 zip(other.data, itertools.cycle((-1, +1)))):
270 if d == -5:
271 start = p
272 elif d == +5:
273 offset += p-start
274 start = None
275 else:
276 out.append(offset + p - start)
277 return RangeSet(data=out)
278
Tao Baoe9b61912015-07-09 17:37:49 -0700279 def extend(self, n):
280 """Extend the RangeSet by 'n' blocks.
281
282 The lower bound is guaranteed to be non-negative.
283
284 >>> RangeSet("0-9").extend(1)
285 <RangeSet("0-10")>
286 >>> RangeSet("10-19").extend(15)
287 <RangeSet("0-34")>
288 >>> RangeSet("10-19 30-39").extend(4)
289 <RangeSet("6-23 26-43")>
290 >>> RangeSet("10-19 30-39").extend(10)
291 <RangeSet("0-49")>
292 """
293 out = self
294 for i in range(0, len(self.data), 2):
295 s, e = self.data[i:i+2]
296 s1 = max(0, s - n)
297 e1 = e + n
298 out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
299 return out
300
Tao Bao9a5caf22015-08-25 15:10:10 -0700301 def first(self, n):
302 """Return the RangeSet that contains at most the first 'n' integers.
303
304 >>> RangeSet("0-9").first(1)
305 <RangeSet("0")>
306 >>> RangeSet("10-19").first(5)
307 <RangeSet("10-14")>
308 >>> RangeSet("10-19").first(15)
309 <RangeSet("10-19")>
310 >>> RangeSet("10-19 30-39").first(3)
311 <RangeSet("10-12")>
312 >>> RangeSet("10-19 30-39").first(15)
313 <RangeSet("10-19 30-34")>
314 >>> RangeSet("10-19 30-39").first(30)
315 <RangeSet("10-19 30-39")>
316 >>> RangeSet("0-9").first(0)
317 <RangeSet("")>
318 """
319
320 if self.size() <= n:
321 return self
322
323 out = []
324 for s, e in self:
325 if e - s >= n:
326 out += (s, s+n)
327 break
328 else:
329 out += (s, e)
330 n -= e - s
331 return RangeSet(data=out)
332
Tao Bao08c85832016-09-19 22:26:30 -0700333 def next_item(self):
334 """Return the next integer represented by the RangeSet.
335
336 >>> list(RangeSet("0-9").next_item())
337 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
338 >>> list(RangeSet("10-19 3-5").next_item())
339 [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Tao Baoc0dcbd02017-11-02 12:19:36 -0700340 >>> list(RangeSet("10-19 3 5 7").next_item())
Tao Bao08c85832016-09-19 22:26:30 -0700341 [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
342 """
343 for s, e in self:
344 for element in range(s, e):
345 yield element
346
Doug Zongker62338182014-09-08 08:29:55 -0700347
348if __name__ == "__main__":
349 import doctest
350 doctest.testmod()