blob: 36becf419214f83a9fb7bcb882bd8e3b17cb4a2a [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
Tao Baofe97dbd2018-02-06 15:01:54 -080016
Doug Zongkerfc44a512014-08-26 13:10:25 -070017import heapq
18import itertools
19
Tao Baofe97dbd2018-02-06 15:01:54 -080020
Doug Zongkerfc44a512014-08-26 13:10:25 -070021__all__ = ["RangeSet"]
22
Tao Baofe97dbd2018-02-06 15:01:54 -080023
Doug Zongkerfc44a512014-08-26 13:10:25 -070024class RangeSet(object):
Tao Baofe97dbd2018-02-06 15:01:54 -080025 """A RangeSet represents a set of non-overlapping ranges on integers.
26
27 Attributes:
28 monotonic: Whether the input has all its integers in increasing order.
29 extra: A dict that can be used by the caller, e.g. to store info that's
30 only meaningful to caller.
31 """
Doug Zongkerfc44a512014-08-26 13:10:25 -070032
33 def __init__(self, data=None):
Dan Albert8b72aef2015-03-23 19:13:21 -070034 self.monotonic = False
Tao Baoc765cca2018-01-31 17:32:40 -080035 self._extra = {}
Doug Zongker846cb3a2014-09-09 10:55:36 -070036 if isinstance(data, str):
37 self._parse_internal(data)
38 elif data:
Tao Baoe8f75612015-08-26 17:07:14 -070039 assert len(data) % 2 == 0
Doug Zongkerfc44a512014-08-26 13:10:25 -070040 self.data = tuple(self._remove_pairs(data))
Tao Baoe8f75612015-08-26 17:07:14 -070041 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
Doug Zongkerfc44a512014-08-26 13:10:25 -070042 else:
43 self.data = ()
44
45 def __iter__(self):
46 for i in range(0, len(self.data), 2):
47 yield self.data[i:i+2]
48
49 def __eq__(self, other):
50 return self.data == other.data
Tao Baoe8f75612015-08-26 17:07:14 -070051
Doug Zongkerfc44a512014-08-26 13:10:25 -070052 def __ne__(self, other):
53 return self.data != other.data
Tao Baoe8f75612015-08-26 17:07:14 -070054
Doug Zongkerfc44a512014-08-26 13:10:25 -070055 def __nonzero__(self):
56 return bool(self.data)
57
58 def __str__(self):
59 if not self.data:
60 return "empty"
61 else:
62 return self.to_string()
63
Doug Zongker846cb3a2014-09-09 10:55:36 -070064 def __repr__(self):
65 return '<RangeSet("' + self.to_string() + '")>'
66
Tao Baoc765cca2018-01-31 17:32:40 -080067 @property
68 def extra(self):
69 return self._extra
70
Doug Zongkerfc44a512014-08-26 13:10:25 -070071 @classmethod
72 def parse(cls, text):
Tao Baofe97dbd2018-02-06 15:01:54 -080073 """Parses a text string into a RangeSet.
Doug Zongkerfc44a512014-08-26 13:10:25 -070074
Tao Baofe97dbd2018-02-06 15:01:54 -080075 The input text string consists of a space-separated list of blocks and
76 ranges, e.g. "10-20 30 35-40". Ranges are interpreted to include both their
77 ends (so the above example represents 18 individual blocks). Returns a
78 RangeSet object.
79
80 If the input has all its blocks in increasing order, then the 'monotonic'
81 attribute of the returned RangeSet will be set to True. For example the
82 input "10-20 30" is monotonic, but the input "15-20 30 10-14" is not, even
83 though they represent the same set of blocks (and the two RangeSets will
84 compare equal with ==).
Doug Zongkerfc44a512014-08-26 13:10:25 -070085 """
Doug Zongker846cb3a2014-09-09 10:55:36 -070086 return cls(text)
Doug Zongkerfc44a512014-08-26 13:10:25 -070087
Tao Bao8179d682016-03-24 11:08:51 -070088 @classmethod
89 def parse_raw(cls, text):
90 """Parse a string generated by RangeSet.to_string_raw().
91
92 >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw())
93 <RangeSet("0-9")>
94 """
95
96 raw = [int(i) for i in text.split(',')]
97 assert raw[0] == len(raw[1:]), "Invalid raw string."
98
99 return cls(data=raw[1:])
100
Doug Zongker846cb3a2014-09-09 10:55:36 -0700101 def _parse_internal(self, text):
Doug Zongkerfc44a512014-08-26 13:10:25 -0700102 data = []
103 last = -1
104 monotonic = True
105 for p in text.split():
106 if "-" in p:
Tao Baoe8f75612015-08-26 17:07:14 -0700107 s, e = (int(x) for x in p.split("-"))
108 data.append(s)
109 data.append(e+1)
Doug Zongkerfc44a512014-08-26 13:10:25 -0700110 if last <= s <= e:
111 last = e
112 else:
113 monotonic = False
114 else:
115 s = int(p)
116 data.append(s)
117 data.append(s+1)
118 if last <= s:
119 last = s+1
120 else:
Tianjie Xucd1e16a2016-04-07 20:17:48 -0700121 monotonic = False
Doug Zongkerfc44a512014-08-26 13:10:25 -0700122 data.sort()
Doug Zongker846cb3a2014-09-09 10:55:36 -0700123 self.data = tuple(self._remove_pairs(data))
124 self.monotonic = monotonic
Doug Zongkerfc44a512014-08-26 13:10:25 -0700125
126 @staticmethod
127 def _remove_pairs(source):
Tao Baoe8f75612015-08-26 17:07:14 -0700128 """Remove consecutive duplicate items to simplify the result.
129
130 [1, 2, 2, 5, 5, 10] will become [1, 10]."""
Doug Zongkerfc44a512014-08-26 13:10:25 -0700131 last = None
132 for i in source:
133 if i == last:
134 last = None
135 else:
136 if last is not None:
137 yield last
138 last = i
139 if last is not None:
140 yield last
141
142 def to_string(self):
143 out = []
144 for i in range(0, len(self.data), 2):
145 s, e = self.data[i:i+2]
146 if e == s+1:
147 out.append(str(s))
148 else:
149 out.append(str(s) + "-" + str(e-1))
150 return " ".join(out)
151
152 def to_string_raw(self):
Tao Baoe8f75612015-08-26 17:07:14 -0700153 assert self.data
Doug Zongkerfc44a512014-08-26 13:10:25 -0700154 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
155
156 def union(self, other):
157 """Return a new RangeSet representing the union of this RangeSet
Doug Zongker846cb3a2014-09-09 10:55:36 -0700158 with the argument.
159
160 >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
161 <RangeSet("10-34")>
162 >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
163 <RangeSet("10-19 22 30-34")>
164 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700165 out = []
166 z = 0
167 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
168 zip(other.data, itertools.cycle((+1, -1)))):
169 if (z == 0 and d == 1) or (z == 1 and d == -1):
170 out.append(p)
171 z += d
172 return RangeSet(data=out)
173
174 def intersect(self, other):
175 """Return a new RangeSet representing the intersection of this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700176 RangeSet with the argument.
177
178 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
179 <RangeSet("18-19 30-32")>
180 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
181 <RangeSet("")>
182 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700183 out = []
184 z = 0
185 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
186 zip(other.data, itertools.cycle((+1, -1)))):
187 if (z == 1 and d == 1) or (z == 2 and d == -1):
188 out.append(p)
189 z += d
190 return RangeSet(data=out)
191
192 def subtract(self, other):
193 """Return a new RangeSet representing subtracting the argument
Doug Zongker846cb3a2014-09-09 10:55:36 -0700194 from this RangeSet.
195
196 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
197 <RangeSet("10-17 33-34")>
198 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
199 <RangeSet("10-19 30-34")>
200 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700201
202 out = []
203 z = 0
204 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
205 zip(other.data, itertools.cycle((-1, +1)))):
206 if (z == 0 and d == 1) or (z == 1 and d == -1):
207 out.append(p)
208 z += d
209 return RangeSet(data=out)
210
211 def overlaps(self, other):
212 """Returns true if the argument has a nonempty overlap with this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700213 RangeSet.
214
215 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
216 True
217 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
218 False
219 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700220
221 # This is like intersect, but we can stop as soon as we discover the
222 # output is going to be nonempty.
223 z = 0
Dan Albert8b72aef2015-03-23 19:13:21 -0700224 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
Doug Zongkerfc44a512014-08-26 13:10:25 -0700225 zip(other.data, itertools.cycle((+1, -1)))):
226 if (z == 1 and d == 1) or (z == 2 and d == -1):
227 return True
228 z += d
229 return False
230
231 def size(self):
232 """Returns the total size of the RangeSet (ie, how many integers
Doug Zongker846cb3a2014-09-09 10:55:36 -0700233 are in the set).
234
235 >>> RangeSet("10-19 30-34").size()
236 15
237 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700238
239 total = 0
240 for i, p in enumerate(self.data):
241 if i % 2:
242 total += p
243 else:
244 total -= p
245 return total
Doug Zongker62338182014-09-08 08:29:55 -0700246
247 def map_within(self, other):
248 """'other' should be a subset of 'self'. Returns a RangeSet
249 representing what 'other' would get translated to if the integers
250 of 'self' were translated down to be contiguous starting at zero.
251
Doug Zongker846cb3a2014-09-09 10:55:36 -0700252 >>> RangeSet("0-9").map_within(RangeSet("3-4"))
253 <RangeSet("3-4")>
254 >>> RangeSet("10-19").map_within(RangeSet("13-14"))
255 <RangeSet("3-4")>
256 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
257 <RangeSet("7-12")>
258 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
259 <RangeSet("2-3 7-12")>
Doug Zongker62338182014-09-08 08:29:55 -0700260 """
261
262 out = []
263 offset = 0
264 start = None
265 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
266 zip(other.data, itertools.cycle((-1, +1)))):
267 if d == -5:
268 start = p
269 elif d == +5:
270 offset += p-start
271 start = None
272 else:
273 out.append(offset + p - start)
274 return RangeSet(data=out)
275
Tao Baoe9b61912015-07-09 17:37:49 -0700276 def extend(self, n):
277 """Extend the RangeSet by 'n' blocks.
278
279 The lower bound is guaranteed to be non-negative.
280
281 >>> RangeSet("0-9").extend(1)
282 <RangeSet("0-10")>
283 >>> RangeSet("10-19").extend(15)
284 <RangeSet("0-34")>
285 >>> RangeSet("10-19 30-39").extend(4)
286 <RangeSet("6-23 26-43")>
287 >>> RangeSet("10-19 30-39").extend(10)
288 <RangeSet("0-49")>
289 """
290 out = self
291 for i in range(0, len(self.data), 2):
292 s, e = self.data[i:i+2]
293 s1 = max(0, s - n)
294 e1 = e + n
295 out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
296 return out
297
Tao Bao9a5caf22015-08-25 15:10:10 -0700298 def first(self, n):
299 """Return the RangeSet that contains at most the first 'n' integers.
300
301 >>> RangeSet("0-9").first(1)
302 <RangeSet("0")>
303 >>> RangeSet("10-19").first(5)
304 <RangeSet("10-14")>
305 >>> RangeSet("10-19").first(15)
306 <RangeSet("10-19")>
307 >>> RangeSet("10-19 30-39").first(3)
308 <RangeSet("10-12")>
309 >>> RangeSet("10-19 30-39").first(15)
310 <RangeSet("10-19 30-34")>
311 >>> RangeSet("10-19 30-39").first(30)
312 <RangeSet("10-19 30-39")>
313 >>> RangeSet("0-9").first(0)
314 <RangeSet("")>
315 """
316
317 if self.size() <= n:
318 return self
319
320 out = []
321 for s, e in self:
322 if e - s >= n:
323 out += (s, s+n)
324 break
325 else:
326 out += (s, e)
327 n -= e - s
328 return RangeSet(data=out)
329
Tao Bao08c85832016-09-19 22:26:30 -0700330 def next_item(self):
331 """Return the next integer represented by the RangeSet.
332
333 >>> list(RangeSet("0-9").next_item())
334 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
335 >>> list(RangeSet("10-19 3-5").next_item())
336 [3, 4, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
Tao Baoc0dcbd02017-11-02 12:19:36 -0700337 >>> list(RangeSet("10-19 3 5 7").next_item())
Tao Bao08c85832016-09-19 22:26:30 -0700338 [3, 5, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
339 """
340 for s, e in self:
341 for element in range(s, e):
342 yield element
343
Doug Zongker62338182014-09-08 08:29:55 -0700344
345if __name__ == "__main__":
346 import doctest
347 doctest.testmod()