blob: 1506658f33c28dd3ee60414b50f279f13d89a239 [file] [log] [blame]
Doug Zongker424296a2014-09-02 08:53:09 -07001# Copyright (C) 2014 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14
Doug Zongkerfc44a512014-08-26 13:10:25 -070015from __future__ import print_function
16import heapq
17import itertools
18
19__all__ = ["RangeSet"]
20
21class RangeSet(object):
22 """A RangeSet represents a set of nonoverlapping ranges on the
23 integers (ie, a set of integers, but efficient when the set contains
24 lots of runs."""
25
26 def __init__(self, data=None):
Dan Albert8b72aef2015-03-23 19:13:21 -070027 self.monotonic = False
Doug Zongker846cb3a2014-09-09 10:55:36 -070028 if isinstance(data, str):
29 self._parse_internal(data)
30 elif data:
Doug Zongkerfc44a512014-08-26 13:10:25 -070031 self.data = tuple(self._remove_pairs(data))
32 else:
33 self.data = ()
34
35 def __iter__(self):
36 for i in range(0, len(self.data), 2):
37 yield self.data[i:i+2]
38
39 def __eq__(self, other):
40 return self.data == other.data
41 def __ne__(self, other):
42 return self.data != other.data
43 def __nonzero__(self):
44 return bool(self.data)
45
46 def __str__(self):
47 if not self.data:
48 return "empty"
49 else:
50 return self.to_string()
51
Doug Zongker846cb3a2014-09-09 10:55:36 -070052 def __repr__(self):
53 return '<RangeSet("' + self.to_string() + '")>'
54
Doug Zongkerfc44a512014-08-26 13:10:25 -070055 @classmethod
56 def parse(cls, text):
57 """Parse a text string consisting of a space-separated list of
58 blocks and ranges, eg "10-20 30 35-40". Ranges are interpreted to
59 include both their ends (so the above example represents 18
60 individual blocks. Returns a RangeSet object.
61
62 If the input has all its blocks in increasing order, then returned
63 RangeSet will have an extra attribute 'monotonic' that is set to
64 True. For example the input "10-20 30" is monotonic, but the input
65 "15-20 30 10-14" is not, even though they represent the same set
66 of blocks (and the two RangeSets will compare equal with ==).
67 """
Doug Zongker846cb3a2014-09-09 10:55:36 -070068 return cls(text)
Doug Zongkerfc44a512014-08-26 13:10:25 -070069
Doug Zongker846cb3a2014-09-09 10:55:36 -070070 def _parse_internal(self, text):
Doug Zongkerfc44a512014-08-26 13:10:25 -070071 data = []
72 last = -1
73 monotonic = True
74 for p in text.split():
75 if "-" in p:
76 s, e = p.split("-")
77 data.append(int(s))
78 data.append(int(e)+1)
79 if last <= s <= e:
80 last = e
81 else:
82 monotonic = False
83 else:
84 s = int(p)
85 data.append(s)
86 data.append(s+1)
87 if last <= s:
88 last = s+1
89 else:
90 monotonic = True
91 data.sort()
Doug Zongker846cb3a2014-09-09 10:55:36 -070092 self.data = tuple(self._remove_pairs(data))
93 self.monotonic = monotonic
Doug Zongkerfc44a512014-08-26 13:10:25 -070094
95 @staticmethod
96 def _remove_pairs(source):
97 last = None
98 for i in source:
99 if i == last:
100 last = None
101 else:
102 if last is not None:
103 yield last
104 last = i
105 if last is not None:
106 yield last
107
108 def to_string(self):
109 out = []
110 for i in range(0, len(self.data), 2):
111 s, e = self.data[i:i+2]
112 if e == s+1:
113 out.append(str(s))
114 else:
115 out.append(str(s) + "-" + str(e-1))
116 return " ".join(out)
117
118 def to_string_raw(self):
119 return str(len(self.data)) + "," + ",".join(str(i) for i in self.data)
120
121 def union(self, other):
122 """Return a new RangeSet representing the union of this RangeSet
Doug Zongker846cb3a2014-09-09 10:55:36 -0700123 with the argument.
124
125 >>> RangeSet("10-19 30-34").union(RangeSet("18-29"))
126 <RangeSet("10-34")>
127 >>> RangeSet("10-19 30-34").union(RangeSet("22 32"))
128 <RangeSet("10-19 22 30-34")>
129 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700130 out = []
131 z = 0
132 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
133 zip(other.data, itertools.cycle((+1, -1)))):
134 if (z == 0 and d == 1) or (z == 1 and d == -1):
135 out.append(p)
136 z += d
137 return RangeSet(data=out)
138
139 def intersect(self, other):
140 """Return a new RangeSet representing the intersection of this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700141 RangeSet with the argument.
142
143 >>> RangeSet("10-19 30-34").intersect(RangeSet("18-32"))
144 <RangeSet("18-19 30-32")>
145 >>> RangeSet("10-19 30-34").intersect(RangeSet("22-28"))
146 <RangeSet("")>
147 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700148 out = []
149 z = 0
150 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
151 zip(other.data, itertools.cycle((+1, -1)))):
152 if (z == 1 and d == 1) or (z == 2 and d == -1):
153 out.append(p)
154 z += d
155 return RangeSet(data=out)
156
157 def subtract(self, other):
158 """Return a new RangeSet representing subtracting the argument
Doug Zongker846cb3a2014-09-09 10:55:36 -0700159 from this RangeSet.
160
161 >>> RangeSet("10-19 30-34").subtract(RangeSet("18-32"))
162 <RangeSet("10-17 33-34")>
163 >>> RangeSet("10-19 30-34").subtract(RangeSet("22-28"))
164 <RangeSet("10-19 30-34")>
165 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700166
167 out = []
168 z = 0
169 for p, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
170 zip(other.data, itertools.cycle((-1, +1)))):
171 if (z == 0 and d == 1) or (z == 1 and d == -1):
172 out.append(p)
173 z += d
174 return RangeSet(data=out)
175
176 def overlaps(self, other):
177 """Returns true if the argument has a nonempty overlap with this
Doug Zongker846cb3a2014-09-09 10:55:36 -0700178 RangeSet.
179
180 >>> RangeSet("10-19 30-34").overlaps(RangeSet("18-32"))
181 True
182 >>> RangeSet("10-19 30-34").overlaps(RangeSet("22-28"))
183 False
184 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700185
186 # This is like intersect, but we can stop as soon as we discover the
187 # output is going to be nonempty.
188 z = 0
Dan Albert8b72aef2015-03-23 19:13:21 -0700189 for _, d in heapq.merge(zip(self.data, itertools.cycle((+1, -1))),
Doug Zongkerfc44a512014-08-26 13:10:25 -0700190 zip(other.data, itertools.cycle((+1, -1)))):
191 if (z == 1 and d == 1) or (z == 2 and d == -1):
192 return True
193 z += d
194 return False
195
196 def size(self):
197 """Returns the total size of the RangeSet (ie, how many integers
Doug Zongker846cb3a2014-09-09 10:55:36 -0700198 are in the set).
199
200 >>> RangeSet("10-19 30-34").size()
201 15
202 """
Doug Zongkerfc44a512014-08-26 13:10:25 -0700203
204 total = 0
205 for i, p in enumerate(self.data):
206 if i % 2:
207 total += p
208 else:
209 total -= p
210 return total
Doug Zongker62338182014-09-08 08:29:55 -0700211
212 def map_within(self, other):
213 """'other' should be a subset of 'self'. Returns a RangeSet
214 representing what 'other' would get translated to if the integers
215 of 'self' were translated down to be contiguous starting at zero.
216
Doug Zongker846cb3a2014-09-09 10:55:36 -0700217 >>> RangeSet("0-9").map_within(RangeSet("3-4"))
218 <RangeSet("3-4")>
219 >>> RangeSet("10-19").map_within(RangeSet("13-14"))
220 <RangeSet("3-4")>
221 >>> RangeSet("10-19 30-39").map_within(RangeSet("17-19 30-32"))
222 <RangeSet("7-12")>
223 >>> RangeSet("10-19 30-39").map_within(RangeSet("12-13 17-19 30-32"))
224 <RangeSet("2-3 7-12")>
Doug Zongker62338182014-09-08 08:29:55 -0700225 """
226
227 out = []
228 offset = 0
229 start = None
230 for p, d in heapq.merge(zip(self.data, itertools.cycle((-5, +5))),
231 zip(other.data, itertools.cycle((-1, +1)))):
232 if d == -5:
233 start = p
234 elif d == +5:
235 offset += p-start
236 start = None
237 else:
238 out.append(offset + p - start)
239 return RangeSet(data=out)
240
Tao Baoe9b61912015-07-09 17:37:49 -0700241 def extend(self, n):
242 """Extend the RangeSet by 'n' blocks.
243
244 The lower bound is guaranteed to be non-negative.
245
246 >>> RangeSet("0-9").extend(1)
247 <RangeSet("0-10")>
248 >>> RangeSet("10-19").extend(15)
249 <RangeSet("0-34")>
250 >>> RangeSet("10-19 30-39").extend(4)
251 <RangeSet("6-23 26-43")>
252 >>> RangeSet("10-19 30-39").extend(10)
253 <RangeSet("0-49")>
254 """
255 out = self
256 for i in range(0, len(self.data), 2):
257 s, e = self.data[i:i+2]
258 s1 = max(0, s - n)
259 e1 = e + n
260 out = out.union(RangeSet(str(s1) + "-" + str(e1-1)))
261 return out
262
Doug Zongker62338182014-09-08 08:29:55 -0700263
264if __name__ == "__main__":
265 import doctest
266 doctest.testmod()