blob: 31ed83a67bc8be5c7ecf979bb7355f47273b2ac1 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22 """A RangeSet represents a set of nonoverlapping ranges on the
23 integers (ie, a set of integers, but efficient when the set contains
24 lots of runs."""
25
26 def __init__(self, data=None):
Dan Albert8b72aef2015-03-23 19:13:21 -070027 self.monotonic = False
Doug Zongker846cb3a2014-09-09 10:55:36 -070028 if isinstance(data, str):
29 self._parse_internal(data)
30 elif data:
Tao Baoe8f75612015-08-26 17:07:14 -070031 assert len(data) % 2 == 0
Doug Zongkerfc44a512014-08-26 13:10:25 -070032 self.data = tuple(self._remove_pairs(data))
Tao Baoe8f75612015-08-26 17:07:14 -070033 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
Doug Zongkerfc44a512014-08-26 13:10:25 -070034 else:
35 self.data = ()
36
37 def __iter__(self):
38 for i in range(0, len(self.data), 2):
39 yield self.data[i:i+2]
40
41 def __eq__(self, other):
42 return self.data == other.data
Tao Baoe8f75612015-08-26 17:07:14 -070043
Doug Zongkerfc44a512014-08-26 13:10:25 -070044 def __ne__(self, other):
45 return self.data != other.data
Tao Baoe8f75612015-08-26 17:07:14 -070046
Doug Zongkerfc44a512014-08-26 13:10:25 -070047 def __nonzero__(self):
48 return bool(self.data)
49
50 def __str__(self):
51 if not self.data:
52 return "empty"
53 else:
54 return self.to_string()
55
Doug Zongker846cb3a2014-09-09 10:55:36 -070056 def __repr__(self):
57 return '<RangeSet("' + self.to_string() + '")>'
58
Doug Zongkerfc44a512014-08-26 13:10:25 -070059 @classmethod
60 def parse(cls, text):
61 """Parse a text string consisting of a space-separated list of
62 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to
63 include both their ends (so the above example represents 18
64 individual blocks. Returns a RangeSet object.
65
66 If the input has all its blocks in increasing order, then returned
67 RangeSet will have an extra attribute 'monotonic' that is set to
68 True. For example the input "10-20 30" is monotonic, but the input
69 "15-20 30 10-14" is not, even though they represent the same set
70 of blocks (and the two RangeSets will compare equal with ==).
71 """
Doug Zongker846cb3a2014-09-09 10:55:36 -070072 return cls(text)
Doug Zongkerfc44a512014-08-26 13:10:25 -070073
Tao Bao8179d682016-03-24 11:08:51 -070074 @classmethod
75 def parse_raw(cls, text):
76 """Parse a string generated by RangeSet.to_string_raw().
77
78 >>> RangeSet.parse_raw(RangeSet("0-9").to_string_raw())
79 <RangeSet("0-9")>
80 """
81
82 raw = [int(i) for i in text.split(',')]
83 assert raw[0] == len(raw[1:]), "Invalid raw string."
84
85 return cls(data=raw[1:])
86
Doug Zongker846cb3a2014-09-09 10:55:36 -070087 def _parse_internal(self, text):
Doug Zongkerfc44a512014-08-26 13:10:25 -070088 data = []
89 last = -1
90 monotonic = True
91 for p in text.split():
92 if "-" in p:
Tao Baoe8f75612015-08-26 17:07:14 -070093 s, e = (int(x) for x in p.split("-"))
94 data.append(s)
95 data.append(e+1)
Doug Zongkerfc44a512014-08-26 13:10:25 -070096 if last <= s <= e:
97 last = e
98 else:
99 monotonic = False
100 else:
101 s = int(p)
102 data.append(s)
103 data.append(s+1)
104 if last <= s:
105 last = s+1
106 else:
107 monotonic = True
108 data.sort()
Doug Zongker846cb3a2014-09-09 10:55:36 -0700109 self.data = tuple(self._remove_pairs(data))
110 self.monotonic = monotonic
Doug Zongkerfc44a512014-08-26 13:10:25 -0700111
112 @staticmethod
113 def _remove_pairs(source):
Tao Baoe8f75612015-08-26 17:07:14 -0700114 """Remove consecutive duplicate items to simplify the result.
115
116 [1, 2, 2, 5, 5, 10] will become [1, 10]."""
Doug Zongkerfc44a512014-08-26 13:10:25 -0700117 last = None
118 for i in source:
119 if i == last:
120 last = None
121 else:
122 if last is not None:
123 yield last
124 last = i
125 if last is not None:
126 yield last
127
128 def to_string(self):
129 out = []
130 for i in range(0, len(self.data), 2):
131 s, e = self.data[i:i+2]
132 if e == s+1:
133 out.append(str(s))
134 else:
135 out.append(str(s) + "-" + str(e-1))
136 return " ".join(out)
137
138 def to_string_raw(self):
Tao Baoe8f75612015-08-26 17:07:14 -0700139 assert self.data
Doug Zongkerfc44a512014-08-26 13:10:25 -0700140 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
141
142 def union(self, other):
143 """Return a new RangeSet representing the union of this RangeSet
Doug Zongker846cb3a2014-09-09 10:55:36 -0700144 with the argument.
145
146 >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
147 <RangeSet("10-34")>
148 >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
149 <RangeSet("10-19 22 30-34")>
150 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700151 out = []
152 z = 0
153 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
154 zip(other.data, itertools.cycle((+1, -1)))):
155 if (z == 0 and d == 1) or (z == 1 and d == -1):
156 out.append(p)
157 z += d
158 return RangeSet(data=out)
159
160 def intersect(self, other):
161 """Return a new RangeSet representing the intersection of this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700162 RangeSet with the argument.
163
164 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
165 <RangeSet("18-19 30-32")>
166 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
167 <RangeSet("")>
168 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700169 out = []
170 z = 0
171 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
172 zip(other.data, itertools.cycle((+1, -1)))):
173 if (z == 1 and d == 1) or (z == 2 and d == -1):
174 out.append(p)
175 z += d
176 return RangeSet(data=out)
177
178 def subtract(self, other):
179 """Return a new RangeSet representing subtracting the argument
Doug Zongker846cb3a2014-09-09 10:55:36 -0700180 from this RangeSet.
181
182 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
183 <RangeSet("10-17 33-34")>
184 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
185 <RangeSet("10-19 30-34")>
186 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700187
188 out = []
189 z = 0
190 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
191 zip(other.data, itertools.cycle((-1, +1)))):
192 if (z == 0 and d == 1) or (z == 1 and d == -1):
193 out.append(p)
194 z += d
195 return RangeSet(data=out)
196
197 def overlaps(self, other):
198 """Returns true if the argument has a nonempty overlap with this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700199 RangeSet.
200
201 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
202 True
203 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
204 False
205 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700206
207 # This is like intersect, but we can stop as soon as we discover the
208 # output is going to be nonempty.
209 z = 0
Dan Albert8b72aef2015-03-23 19:13:21 -0700210 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
Doug Zongkerfc44a512014-08-26 13:10:25 -0700211 zip(other.data, itertools.cycle((+1, -1)))):
212 if (z == 1 and d == 1) or (z == 2 and d == -1):
213 return True
214 z += d
215 return False
216
217 def size(self):
218 """Returns the total size of the RangeSet (ie, how many integers
Doug Zongker846cb3a2014-09-09 10:55:36 -0700219 are in the set).
220
221 >>> RangeSet("10-19 30-34").size()
222 15
223 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700224
225 total = 0
226 for i, p in enumerate(self.data):
227 if i % 2:
228 total += p
229 else:
230 total -= p
231 return total
Doug Zongker62338182014-09-08 08:29:55 -0700232
233 def map_within(self, other):
234 """'other' should be a subset of 'self'. Returns a RangeSet
235 representing what 'other' would get translated to if the integers
236 of 'self' were translated down to be contiguous starting at zero.
237
Doug Zongker846cb3a2014-09-09 10:55:36 -0700238 >>> RangeSet("0-9").map_within(RangeSet("3-4"))
239 <RangeSet("3-4")>
240 >>> RangeSet("10-19").map_within(RangeSet("13-14"))
241 <RangeSet("3-4")>
242 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
243 <RangeSet("7-12")>
244 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
245 <RangeSet("2-3 7-12")>
Doug Zongker62338182014-09-08 08:29:55 -0700246 """
247
248 out = []
249 offset = 0
250 start = None
251 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
252 zip(other.data, itertools.cycle((-1, +1)))):
253 if d == -5:
254 start = p
255 elif d == +5:
256 offset += p-start
257 start = None
258 else:
259 out.append(offset + p - start)
260 return RangeSet(data=out)
261
Tao Baoe9b61912015-07-09 17:37:49 -0700262 def extend(self, n):
263 """Extend the RangeSet by 'n' blocks.
264
265 The lower bound is guaranteed to be non-negative.
266
267 >>> RangeSet("0-9").extend(1)
268 <RangeSet("0-10")>
269 >>> RangeSet("10-19").extend(15)
270 <RangeSet("0-34")>
271 >>> RangeSet("10-19 30-39").extend(4)
272 <RangeSet("6-23 26-43")>
273 >>> RangeSet("10-19 30-39").extend(10)
274 <RangeSet("0-49")>
275 """
276 out = self
277 for i in range(0, len(self.data), 2):
278 s, e = self.data[i:i+2]
279 s1 = max(0, s - n)
280 e1 = e + n
281 out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
282 return out
283
Tao Bao9a5caf22015-08-25 15:10:10 -0700284 def first(self, n):
285 """Return the RangeSet that contains at most the first 'n' integers.
286
287 >>> RangeSet("0-9").first(1)
288 <RangeSet("0")>
289 >>> RangeSet("10-19").first(5)
290 <RangeSet("10-14")>
291 >>> RangeSet("10-19").first(15)
292 <RangeSet("10-19")>
293 >>> RangeSet("10-19 30-39").first(3)
294 <RangeSet("10-12")>
295 >>> RangeSet("10-19 30-39").first(15)
296 <RangeSet("10-19 30-34")>
297 >>> RangeSet("10-19 30-39").first(30)
298 <RangeSet("10-19 30-39")>
299 >>> RangeSet("0-9").first(0)
300 <RangeSet("")>
301 """
302
303 if self.size() <= n:
304 return self
305
306 out = []
307 for s, e in self:
308 if e - s >= n:
309 out += (s, s+n)
310 break
311 else:
312 out += (s, e)
313 n -= e - s
314 return RangeSet(data=out)
315
Doug Zongker62338182014-09-08 08:29:55 -0700316
317if __name__ == "__main__":
318 import doctest
319 doctest.testmod()