blob: 975a48a4561aeea9ef0ebcd949ee941d499a917d [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22 """A RangeSet represents a set of nonoverlapping ranges on the
23 integers (ie, a set of integers, but efficient when the set contains
24 lots of runs."""
25
26 def __init__(self, data=None):
Dan Albert8b72aef2015-03-23 19:13:21 -070027 self.monotonic = False
Doug Zongker846cb3a2014-09-09 10:55:36 -070028 if isinstance(data, str):
29 self._parse_internal(data)
30 elif data:
Tao Baoe8f75612015-08-26 17:07:14 -070031 assert len(data) % 2 == 0
Doug Zongkerfc44a512014-08-26 13:10:25 -070032 self.data = tuple(self._remove_pairs(data))
Tao Baoe8f75612015-08-26 17:07:14 -070033 self.monotonic = all(x < y for x, y in zip(self.data, self.data[1:]))
Doug Zongkerfc44a512014-08-26 13:10:25 -070034 else:
35 self.data = ()
36
37 def __iter__(self):
38 for i in range(0, len(self.data), 2):
39 yield self.data[i:i+2]
40
41 def __eq__(self, other):
42 return self.data == other.data
Tao Baoe8f75612015-08-26 17:07:14 -070043
Doug Zongkerfc44a512014-08-26 13:10:25 -070044 def __ne__(self, other):
45 return self.data != other.data
Tao Baoe8f75612015-08-26 17:07:14 -070046
Doug Zongkerfc44a512014-08-26 13:10:25 -070047 def __nonzero__(self):
48 return bool(self.data)
49
50 def __str__(self):
51 if not self.data:
52 return "empty"
53 else:
54 return self.to_string()
55
Doug Zongker846cb3a2014-09-09 10:55:36 -070056 def __repr__(self):
57 return '<RangeSet("' + self.to_string() + '")>'
58
Doug Zongkerfc44a512014-08-26 13:10:25 -070059 @classmethod
60 def parse(cls, text):
61 """Parse a text string consisting of a space-separated list of
62 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to
63 include both their ends (so the above example represents 18
64 individual blocks. Returns a RangeSet object.
65
66 If the input has all its blocks in increasing order, then returned
67 RangeSet will have an extra attribute 'monotonic' that is set to
68 True. For example the input "10-20 30" is monotonic, but the input
69 "15-20 30 10-14" is not, even though they represent the same set
70 of blocks (and the two RangeSets will compare equal with ==).
71 """
Doug Zongker846cb3a2014-09-09 10:55:36 -070072 return cls(text)
Doug Zongkerfc44a512014-08-26 13:10:25 -070073
Doug Zongker846cb3a2014-09-09 10:55:36 -070074 def _parse_internal(self, text):
Doug Zongkerfc44a512014-08-26 13:10:25 -070075 data = []
76 last = -1
77 monotonic = True
78 for p in text.split():
79 if "-" in p:
Tao Baoe8f75612015-08-26 17:07:14 -070080 s, e = (int(x) for x in p.split("-"))
81 data.append(s)
82 data.append(e+1)
Doug Zongkerfc44a512014-08-26 13:10:25 -070083 if last <= s <= e:
84 last = e
85 else:
86 monotonic = False
87 else:
88 s = int(p)
89 data.append(s)
90 data.append(s+1)
91 if last <= s:
92 last = s+1
93 else:
94 monotonic = True
95 data.sort()
Doug Zongker846cb3a2014-09-09 10:55:36 -070096 self.data = tuple(self._remove_pairs(data))
97 self.monotonic = monotonic
Doug Zongkerfc44a512014-08-26 13:10:25 -070098
99 @staticmethod
100 def _remove_pairs(source):
Tao Baoe8f75612015-08-26 17:07:14 -0700101 """Remove consecutive duplicate items to simplify the result.
102
103 [1, 2, 2, 5, 5, 10] will become [1, 10]."""
Doug Zongkerfc44a512014-08-26 13:10:25 -0700104 last = None
105 for i in source:
106 if i == last:
107 last = None
108 else:
109 if last is not None:
110 yield last
111 last = i
112 if last is not None:
113 yield last
114
115 def to_string(self):
116 out = []
117 for i in range(0, len(self.data), 2):
118 s, e = self.data[i:i+2]
119 if e == s+1:
120 out.append(str(s))
121 else:
122 out.append(str(s) + "-" + str(e-1))
123 return " ".join(out)
124
125 def to_string_raw(self):
Tao Baoe8f75612015-08-26 17:07:14 -0700126 assert self.data
Doug Zongkerfc44a512014-08-26 13:10:25 -0700127 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
128
129 def union(self, other):
130 """Return a new RangeSet representing the union of this RangeSet
Doug Zongker846cb3a2014-09-09 10:55:36 -0700131 with the argument.
132
133 >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
134 <RangeSet("10-34")>
135 >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
136 <RangeSet("10-19 22 30-34")>
137 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700138 out = []
139 z = 0
140 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
141 zip(other.data, itertools.cycle((+1, -1)))):
142 if (z == 0 and d == 1) or (z == 1 and d == -1):
143 out.append(p)
144 z += d
145 return RangeSet(data=out)
146
147 def intersect(self, other):
148 """Return a new RangeSet representing the intersection of this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700149 RangeSet with the argument.
150
151 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
152 <RangeSet("18-19 30-32")>
153 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
154 <RangeSet("")>
155 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700156 out = []
157 z = 0
158 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
159 zip(other.data, itertools.cycle((+1, -1)))):
160 if (z == 1 and d == 1) or (z == 2 and d == -1):
161 out.append(p)
162 z += d
163 return RangeSet(data=out)
164
165 def subtract(self, other):
166 """Return a new RangeSet representing subtracting the argument
Doug Zongker846cb3a2014-09-09 10:55:36 -0700167 from this RangeSet.
168
169 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
170 <RangeSet("10-17 33-34")>
171 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
172 <RangeSet("10-19 30-34")>
173 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700174
175 out = []
176 z = 0
177 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
178 zip(other.data, itertools.cycle((-1, +1)))):
179 if (z == 0 and d == 1) or (z == 1 and d == -1):
180 out.append(p)
181 z += d
182 return RangeSet(data=out)
183
184 def overlaps(self, other):
185 """Returns true if the argument has a nonempty overlap with this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700186 RangeSet.
187
188 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
189 True
190 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
191 False
192 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700193
194 # This is like intersect, but we can stop as soon as we discover the
195 # output is going to be nonempty.
196 z = 0
Dan Albert8b72aef2015-03-23 19:13:21 -0700197 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
Doug Zongkerfc44a512014-08-26 13:10:25 -0700198 zip(other.data, itertools.cycle((+1, -1)))):
199 if (z == 1 and d == 1) or (z == 2 and d == -1):
200 return True
201 z += d
202 return False
203
204 def size(self):
205 """Returns the total size of the RangeSet (ie, how many integers
Doug Zongker846cb3a2014-09-09 10:55:36 -0700206 are in the set).
207
208 >>> RangeSet("10-19 30-34").size()
209 15
210 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700211
212 total = 0
213 for i, p in enumerate(self.data):
214 if i % 2:
215 total += p
216 else:
217 total -= p
218 return total
Doug Zongker62338182014-09-08 08:29:55 -0700219
220 def map_within(self, other):
221 """'other' should be a subset of 'self'. Returns a RangeSet
222 representing what 'other' would get translated to if the integers
223 of 'self' were translated down to be contiguous starting at zero.
224
Doug Zongker846cb3a2014-09-09 10:55:36 -0700225 >>> RangeSet("0-9").map_within(RangeSet("3-4"))
226 <RangeSet("3-4")>
227 >>> RangeSet("10-19").map_within(RangeSet("13-14"))
228 <RangeSet("3-4")>
229 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
230 <RangeSet("7-12")>
231 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
232 <RangeSet("2-3 7-12")>
Doug Zongker62338182014-09-08 08:29:55 -0700233 """
234
235 out = []
236 offset = 0
237 start = None
238 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
239 zip(other.data, itertools.cycle((-1, +1)))):
240 if d == -5:
241 start = p
242 elif d == +5:
243 offset += p-start
244 start = None
245 else:
246 out.append(offset + p - start)
247 return RangeSet(data=out)
248
Tao Baoe9b61912015-07-09 17:37:49 -0700249 def extend(self, n):
250 """Extend the RangeSet by 'n' blocks.
251
252 The lower bound is guaranteed to be non-negative.
253
254 >>> RangeSet("0-9").extend(1)
255 <RangeSet("0-10")>
256 >>> RangeSet("10-19").extend(15)
257 <RangeSet("0-34")>
258 >>> RangeSet("10-19 30-39").extend(4)
259 <RangeSet("6-23 26-43")>
260 >>> RangeSet("10-19 30-39").extend(10)
261 <RangeSet("0-49")>
262 """
263 out = self
264 for i in range(0, len(self.data), 2):
265 s, e = self.data[i:i+2]
266 s1 = max(0, s - n)
267 e1 = e + n
268 out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
269 return out
270
Tao Bao9a5caf22015-08-25 15:10:10 -0700271 def first(self, n):
272 """Return the RangeSet that contains at most the first 'n' integers.
273
274 >>> RangeSet("0-9").first(1)
275 <RangeSet("0")>
276 >>> RangeSet("10-19").first(5)
277 <RangeSet("10-14")>
278 >>> RangeSet("10-19").first(15)
279 <RangeSet("10-19")>
280 >>> RangeSet("10-19 30-39").first(3)
281 <RangeSet("10-12")>
282 >>> RangeSet("10-19 30-39").first(15)
283 <RangeSet("10-19 30-34")>
284 >>> RangeSet("10-19 30-39").first(30)
285 <RangeSet("10-19 30-39")>
286 >>> RangeSet("0-9").first(0)
287 <RangeSet("")>
288 """
289
290 if self.size() <= n:
291 return self
292
293 out = []
294 for s, e in self:
295 if e - s >= n:
296 out += (s, s+n)
297 break
298 else:
299 out += (s, e)
300 n -= e - s
301 return RangeSet(data=out)
302
Doug Zongker62338182014-09-08 08:29:55 -0700303
304if __name__ == "__main__":
305 import doctest
306 doctest.testmod()