from typing import List, Tuple
def discretize_correct(ranges: List[Tuple]) -> List[Tuple]:
"""We are passed a list of tuples each representing a range
and we return a new list of range tuples."""
if not ranges: # Empty list
return []
results = []
MIN_RANGE_NUMBER = min(range[0] for range in ranges)
MAX_RANGE_NUMBER = max(range[1] for range in ranges)
range_size = MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
values = [None] * range_size
for range in ranges:
start, end, value = range
start -= MIN_RANGE_NUMBER
end -= MIN_RANGE_NUMBER
values[start:end+1] = [value] * (end + 1 - start)
results = []
start_idx, last_value = None, None
for idx, value in enumerate(values, start=MIN_RANGE_NUMBER):
if value is None:
if last_value is not None:
resultresults.append((start_idx, idx - 1, last_value))
last_value = None
elif last_value is None:
start_idx = idx
last_value = value
elif value != last_value:
results.append((start_idx, idx - 1, last_value))
start_idx = idx
last_value = value
if last_value is not None:
results.append((start_idx, MAX_RANGE_NUMBER, last_value))
return results
if __name__ == '__main__':
from ast import literal_eval
with open('test.txt') as f:
data = literal_eval(f.read())
ranges = discretize_correct(data)
# Print out first 20 ranges:
for i in range(20):
print(f'{i}: {ranges[i]}')
from typing import List, Tuple
def discretize_correct(ranges: List[Tuple]) -> List[Tuple]:
"""We are passed a list of tuples each representing a range
and we return a new list of range tuples."""
if not ranges: # Empty list
return []
results = []
MIN_RANGE_NUMBER = min(range[0] for range in ranges)
MAX_RANGE_NUMBER = max(range[1] for range in ranges)
range_size = MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
values = [None] * range_size
for range in ranges:
start, end, value = range
start -= MIN_RANGE_NUMBER
end -= MIN_RANGE_NUMBER
values[start:end+1] = [value] * (end + 1 - start)
results = []
start_idx, last_value = None, None
for idx, value in enumerate(values, start=MIN_RANGE_NUMBER):
if value is None:
if last_value is not None:
result.append((start_idx, idx - 1, last_value))
last_value = None
elif last_value is None:
start_idx = idx
last_value = value
elif value != last_value:
results.append((start_idx, idx - 1, last_value))
start_idx = idx
last_value = value
if last_value is not None:
results.append((start_idx, MAX_RANGE_NUMBER, last_value))
return results
if __name__ == '__main__':
from ast import literal_eval
with open('test.txt') as f:
data = literal_eval(f.read())
ranges = discretize_correct(data)
# Print out first 20 ranges:
for i in range(20):
print(f'{i}: {ranges[i]}')
from typing import List, Tuple
def discretize_correct(ranges: List[Tuple]) -> List[Tuple]:
"""We are passed a list of tuples each representing a range
and we return a new list of range tuples."""
if not ranges: # Empty list
return []
results = []
MIN_RANGE_NUMBER = min(range[0] for range in ranges)
MAX_RANGE_NUMBER = max(range[1] for range in ranges)
range_size = MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
values = [None] * range_size
for range in ranges:
start, end, value = range
start -= MIN_RANGE_NUMBER
end -= MIN_RANGE_NUMBER
values[start:end+1] = [value] * (end + 1 - start)
results = []
start_idx, last_value = None, None
for idx, value in enumerate(values, start=MIN_RANGE_NUMBER):
if value is None:
if last_value is not None:
results.append((start_idx, idx - 1, last_value))
last_value = None
elif last_value is None:
start_idx = idx
last_value = value
elif value != last_value:
results.append((start_idx, idx - 1, last_value))
start_idx = idx
last_value = value
if last_value is not None:
results.append((start_idx, MAX_RANGE_NUMBER, last_value))
return results
if __name__ == '__main__':
from ast import literal_eval
with open('test.txt') as f:
data = literal_eval(f.read())
ranges = discretize_correct(data)
# Print out first 20 ranges:
for i in range(20):
print(f'{i}: {ranges[i]}')
I have abandoned my implementation, which I agree does not work on the test case the OP posted. However, I do believe that the OP's code also produces erroneous results. So, first I am replicating here what had beenbelow comments I had made to my answer that highlight what I perceive to be the problem:
To verify what I expected the correct results to be, I implemented code that computes the ranges in an inefficient but hopefully correct way: I allocate a list values
whose size if MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
where MAX_RANGE_NUMBER
would correspond to the maximum value of t[1]
where t
enumerates all the tuples in your test data and MIN_RANGE_NUMBER
is the minimum value of t[0]
. I initialize each element of the list to None
. I then proceed to iterate each tuple in the test data. If a tuple were, for example, (17, 27, 19)
I would assign values[17-MIN_RANGE_NUMBER:27+MIN_RANGE_NUMBER+1] = [19] * 11
. After all ranges have been thus processed it is fairly simple to process the values
list to come up with the non-overlapping ranges. I then print out the first 20 such ranges:
The above verifies that yourthe OP's code and the supposedly correct code posted here both produce results not only different from one another but also from what I believe are truly the correct results. Even if I have misunderstood the problem definition rendering the above results from my discretize_correct
function irrelevant, the OP's results still vary from those produced from what the OP says is correct code.
I have abandoned my implementation, which I agree does not work on the test case the OP posted. However, I do believe that the OP's code also produces erroneous results. So, first I am replicating here what had been comments I made to my answer:
To verify what I expected the correct results to be, I implemented code that computes the inefficient way: I allocate a list values
whose size if MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
where MAX_RANGE_NUMBER
would correspond to the maximum value of t[1]
where t
enumerates all the tuples in your test data and MIN_RANGE_NUMBER
is the minimum value of t[0]
. I initialize each element of the list to None
. I then proceed to iterate each tuple in the test data. If a tuple were, for example, (17, 27, 19)
I would assign values[17-MIN_RANGE_NUMBER:27+MIN_RANGE_NUMBER+1] = [19] * 11
. After all ranges have been thus processed it is fairly simple to process the values
list to come up with the non-overlapping ranges. I then print out the first 20 such ranges:
The above verifies that your code and the supposedly correct code posted here both produce results not only different from one another but also from what I believe are truly the correct results. Even if I have misunderstood the problem definition rendering the above results from my discretize_correct
function irrelevant, the OP's results still vary from those produced from what the OP says is correct code.
I have abandoned my implementation, which I agree does not work on the test case the OP posted. However, I do believe that the OP's code also produces erroneous results. So first I am replicating below comments I had made to my answer that highlight what I perceive to be the problem:
To verify what I expected the correct results to be, I implemented code that computes the ranges in an inefficient but hopefully correct way: I allocate a list values
whose size if MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
where MAX_RANGE_NUMBER
would correspond to the maximum value of t[1]
where t
enumerates all the tuples in your test data and MIN_RANGE_NUMBER
is the minimum value of t[0]
. I initialize each element of the list to None
. I then proceed to iterate each tuple in the test data. If a tuple were, for example, (17, 27, 19)
I would assign values[17-MIN_RANGE_NUMBER:27+MIN_RANGE_NUMBER+1] = [19] * 11
. After all ranges have been thus processed it is fairly simple to process the values
list to come up with the non-overlapping ranges. I then print out the first 20 such ranges:
The above verifies that the OP's code and the supposedly correct code posted here both produce results not only different from one another but also from what I believe are truly the correct results. Even if I have misunderstood the problem definition rendering the above results from my discretize_correct
function irrelevant, the OP's results still vary from those produced from what the OP says is correct code.
It may not be much faster for all possible cases you mayUpdate
I have abandoned my implementation, but it is considerably faster forwhich I agree does not work on the input cases you showedtest case the OP posted. You will need to try this out on other inputsHowever, I do believe that you could expectthe OP's code also produces erroneous results. In any case usingSo, first I am replicating here what had been comments I made to my answer:
I have been trying to modify my code so that I get the same results with the test data you posted here . I printed out the first few ranges in your results and ranges 17 through 19 (origin 0) are
Range(2414, 2573, 14), Range(2574, 2599, 9), Range(2414, 2574, 14)
. That does not look right to me. The algorithm you state is correct, which I have not studied, has the following ranges for elements 17 - 19:(2414, 2574, 14), (2575, 2599, 9), (2600, 2816, 6)
, which is clearly different from what your code produced. The test data has as input[... (2414, 2574, 14), (2574, 2599, 9), ...]
. I would think that the value for key 2574 should be 9 if we apply the dictionary updates in order. This suggests to me that the known "correct" algorithm is not so correct and elements 17 - 19 should be(2414, 2573, 14), (2574, 2599, 9), (2600, 2816, 6)
.
To verify what I expected the correct results to be, I implemented code that computes the inefficient way: I allocate a list heapqvalues
is easier to understandwhose size if (and thusMAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
where MAX_RANGE_NUMBER
would correspond to maintain). The new function is calledthe maximum value of discretize2t[1]
inwhere t
enumerates all the code belowtuples in your test data and returns a listMIN_RANGE_NUMBER
is the minimum value of range tuplest[0]
. It also has the advantage thatI initialize each element of the input does not havelist to be sorted (if you have unsorted input,None
. I then un-comment out the callproceed to iterate each tuple in the test data. If a tuple were, for example, heapq.heapify(17, 27, 19)
). I leavewould assign values[17-MIN_RANGE_NUMBER:27+MIN_RANGE_NUMBER+1] = [19] * 11
. After all ranges have been thus processed it to you as ais fairly simple exercise to modifyprocess the code if you require a list of Rangevalues
instanceslist to come up with the non-overlapping ranges. I then print out the first 20 such ranges:
from collections import deque
class Range:
processed = deque()
stack = deque()
def __init__(self, ini, fin, data) -> None:
self.ini = ini
self.fin = fin
self.data = data
def __repr__(self):
return f'Range({self.ini}, {self.fin}, {self.data})'
def cover(self, fin):
if fin <= self.fin:
return True
def join(self, ini, fin):
if ini <= self.fin + 1:
self.fin = fin
return True
def prepend(self, ini):
if self.ini < ini:
Range.processed.append(Range(self.ini, ini - 1, self.data))
def climb(self, fin):
if self.ini < (ini := fin + 1) <= self.fin:
self.ini = ini
def update(self, ini, fin, data):
if self.ini == ini and self.fin == fin:
self.data = data
return True
def process(self, ini, fin, data):
ignore = past = False
if self.data == data:
ignore = self.cover(fin) or self.join(ini, fin)
elif ini <= self.fin and not (ignore := self.update(ini, fin, data)):
self.prepend(ini)
if not ignore:
if ini > self.fin:
Range.processed.append(self)
Range.stack.pop()
past = True
self.climb(fin)
return not ignore, past
def discretize(ranges):
Range.processed.clear()
Range.stack.clear()
for ini, fin, data in ranges:
if not Range.stack:
Range.stack.append(Range(ini, fin, data))
else:
append, past = Range.stack[-1].process(ini, fin, data)
while past and Range.stack:
_, past = Range.stack[-1].process(ini, fin, data)
if append:
Range.stack.append(Range(ini, fin, data))
Range.processed.extend(reversed(Range.stack))
return Range.processed
##################################################################################
from typing import List, Tuple
import heapq
def discretize2discretize_correct(dataranges: List[Tuple]) -> List[Tuple]:
"""We are passed a list of tuples each representing a range
and we return a new list of range tuples."""
if not dataranges: # Empty list
return []
results = []
# Make copy of passed data, which we do not want to destroy:
rangesMIN_RANGE_NUMBER = data.copymin()
# Call to heapq.heapify only neededrange[0] for unsorted input:
range in #heapq.heapify(ranges) # Turn into a heap
while ranges:
range1MAX_RANGE_NUMBER = heapq.heappopmax(ranges) # Get minimum "range"
if not ranges:range[1] #for Norange morein ranges
results.append(range1) # Add to results
range_size = returnMAX_RANGE_NUMBER results- #MIN_RANGE_NUMBER And+ return
1
range2values = ranges[0] # Get new[None] minimum* rangerange_size
iffor range1[1]range <in range2[0]ranges:
start, end, value = results.append(range1)range
else:
start -= MIN_RANGE_NUMBER
results.append((range1[0], range2[0]end - 1,= range1[2]))MIN_RANGE_NUMBER
values[start:end+1] = [value] heapq.heappush(ranges,* (range2[1]end + 1, range1[1], range1[2]))
if __name__ == '__main__':
import timeit
def compare_results(data: str, results1: List[Range], results2: List[Tuple]) -> None:
print(start)
print(f"Comparing Results Using {data} as Input:")
l1results = len(results1)[]
start_idx, l2last_value = len(results2)
if l1 !=None, l2:None
for idx, value raisein Exceptionenumerate('Results have differentvalues, lengths.'start=MIN_RANGE_NUMBER):
forif ivalue inis range(l1)None:
if (results1[i].ini, results1[i].fin,last_value results1[i].data)is !=not results2[i]None:
raise Exception('Different resultsresult.')
print('Passed!')
data1 = [
(3, 2789, 2681),
append(27, 2349, 2004),
(29, 1790, 2072)start_idx, idx (32,- 15071, 1173last_value),
(35, 768, 3453),
(63, 222, 2108),
(341,last_value 501,= 2938),None
(378, 444, 258),
elif (870,last_value 888,is 1364),None:
(1684, 1693, 880)
]
data2start_idx = [(2518812, 3931503981, 1222922854),idx
(8283077, 3890282434, 2360189310),
(21368257,last_value 3571587114,= 2651809994),value
(24571285, 2875098555, 1506778869),
elif (40433776,value 1751972813,!= 4162881198),last_value:
(60822897, 1413187009, 921758666),
results.append(73435921, 1245663774, 3798627995),
(169616863, 530686872, 1316302364)start_idx, idx (273065748,- 4323632051, 3428529648last_value),
(291266380, 409571968, 4206669862),
(558988640, 574048639, 4272236023),
(619240775,start_idx 631101725,= 1910576108),idx
(727687992, 756241696, 3095538347),
(825700858,last_value 866696475,= 2647033502),value
(853058882, 854026525, 2456782176),
(967641241, 1001077664, 3556790281)
if last_value is not ]
None:
data3 = [(0, 10, 'A'), (100, 200, 'B'), (150, 180, 'C')]
print('discretize(data1):', timeitresults.timeitappend(stmt="discretize(data1)"start_idx, number=100_000MAX_RANGE_NUMBER, globals=globals()last_value))
print('discretize2(data1):', timeit.timeit(stmt="discretize2(data1)", number=100_000,return globals=globals()))results
print()
if __name__ == print('discretize(data2)'__main__':', timeit.timeit(stmt="discretize(data2)", number=100_000, globals=globals()))
print('discretize2(data2):',from timeit.timeit(stmt="discretize2(data2)",ast number=100_000,import globals=globals()))literal_eval
print()
print('discretize(data3):', timeit.timeit(stmt="discretize(data3)", number=100_000, globals=globals()))
with print('discretize2open(data3):', timeit'test.timeit(stmt="discretize2(data3)", number=100_000, globals=globals())txt') as compare_results('data1',f:
data = discretizeliteral_eval(data1),
discretize2f.read(data1))
compare_results('data2',
discretize(data2),
ranges = discretize2discretize_correct(data2data)
# )
Print out first 20 compare_results('data3',ranges:
for i in discretizerange(data320),:
discretize2print(data3)
f'{i}: {ranges[i]}')
discretize(data1)0: (10, 181, 2.0479232)
discretize21: (data1182, 706, 11)
2: 0.9702454
discretize(data2707, 920, 6)
3: 3.2891357999999995
discretize2(data2921, 1018, 7)
4: 1.5896913000000001
discretize(data31019, 1032, 6)
5: 0.36877010000000077(1033, 1200, 2)
discretize26: (data31201, 1204, 6)
7: (1205, 1617, 0.13420890000000085
)
Comparing8: Results(1618, Using1667, data16)
9: as(1668, Input:
Passed!1905, 11)
10: (1906, 1977, 6)
Comparing11: Results(1978, Using2015, data22)
12: as(2016, Input2075, 6)
13: (2076, 2082, 3)
Passed!14: (2083, 2201, 6)
15: (2202, 2320, 10)
Comparing16: Results(2321, Using2413, data36)
17: as(2414, Input2573, 14)
18: (2574, 2599, 9)
Passed!19: (2600, 2816, 6)
If you stick withConclusion
The above verifies that your version,code and the supposedly correct code posted here both produce results not only different from one another but also from what I would suggest that you add a docstringbelieve are truly the correct results. Even if I have misunderstood the problem definition rendering the above results from my discretize_correct
function irrelevant, type hints and commentsthe OP's results still vary from those produced from what the OP says is correct code.
Consequently, should the OP's code now be moved to stackoverflow.com for correction before it is re-posted here?
It may not be much faster for all possible cases you may have, but it is considerably faster for the input cases you showed. You will need to try this out on other inputs that you could expect. In any case using a heapq
is easier to understand (and thus to maintain). The new function is called discretize2
in the code below and returns a list of range tuples. It also has the advantage that the input does not have to be sorted (if you have unsorted input, then un-comment out the call to heapq.heapify
). I leave it to you as a simple exercise to modify the code if you require a list of Range
instances:
from collections import deque
class Range:
processed = deque()
stack = deque()
def __init__(self, ini, fin, data) -> None:
self.ini = ini
self.fin = fin
self.data = data
def __repr__(self):
return f'Range({self.ini}, {self.fin}, {self.data})'
def cover(self, fin):
if fin <= self.fin:
return True
def join(self, ini, fin):
if ini <= self.fin + 1:
self.fin = fin
return True
def prepend(self, ini):
if self.ini < ini:
Range.processed.append(Range(self.ini, ini - 1, self.data))
def climb(self, fin):
if self.ini < (ini := fin + 1) <= self.fin:
self.ini = ini
def update(self, ini, fin, data):
if self.ini == ini and self.fin == fin:
self.data = data
return True
def process(self, ini, fin, data):
ignore = past = False
if self.data == data:
ignore = self.cover(fin) or self.join(ini, fin)
elif ini <= self.fin and not (ignore := self.update(ini, fin, data)):
self.prepend(ini)
if not ignore:
if ini > self.fin:
Range.processed.append(self)
Range.stack.pop()
past = True
self.climb(fin)
return not ignore, past
def discretize(ranges):
Range.processed.clear()
Range.stack.clear()
for ini, fin, data in ranges:
if not Range.stack:
Range.stack.append(Range(ini, fin, data))
else:
append, past = Range.stack[-1].process(ini, fin, data)
while past and Range.stack:
_, past = Range.stack[-1].process(ini, fin, data)
if append:
Range.stack.append(Range(ini, fin, data))
Range.processed.extend(reversed(Range.stack))
return Range.processed
##################################################################################
from typing import List, Tuple
import heapq
def discretize2(data: List[Tuple]) -> List[Tuple]:
"""We are passed a list of tuples each representing a range
and we return a new list of range tuples."""
if not data: # Empty list
return []
results = []
# Make copy of passed data, which we do not want to destroy:
ranges = data.copy()
# Call to heapq.heapify only needed for unsorted input:
#heapq.heapify(ranges) # Turn into a heap
while ranges:
range1 = heapq.heappop(ranges) # Get minimum "range"
if not ranges: # No more ranges
results.append(range1) # Add to results
return results # And return
range2 = ranges[0] # Get new minimum range
if range1[1] < range2[0]:
results.append(range1)
else:
results.append((range1[0], range2[0] - 1, range1[2]))
heapq.heappush(ranges, (range2[1] + 1, range1[1], range1[2]))
if __name__ == '__main__':
import timeit
def compare_results(data: str, results1: List[Range], results2: List[Tuple]) -> None:
print()
print(f"Comparing Results Using {data} as Input:")
l1 = len(results1)
l2 = len(results2)
if l1 != l2:
raise Exception('Results have different lengths.')
for i in range(l1):
if (results1[i].ini, results1[i].fin, results1[i].data) != results2[i]:
raise Exception('Different results.')
print('Passed!')
data1 = [
(3, 2789, 2681),
(27, 2349, 2004),
(29, 1790, 2072), (32, 1507, 1173),
(35, 768, 3453),
(63, 222, 2108),
(341, 501, 2938),
(378, 444, 258),
(870, 888, 1364),
(1684, 1693, 880)
]
data2 = [(2518812, 3931503981, 1222922854),
(8283077, 3890282434, 2360189310),
(21368257, 3571587114, 2651809994),
(24571285, 2875098555, 1506778869),
(40433776, 1751972813, 4162881198),
(60822897, 1413187009, 921758666),
(73435921, 1245663774, 3798627995),
(169616863, 530686872, 1316302364), (273065748, 432363205, 3428529648),
(291266380, 409571968, 4206669862),
(558988640, 574048639, 4272236023),
(619240775, 631101725, 1910576108),
(727687992, 756241696, 3095538347),
(825700858, 866696475, 2647033502),
(853058882, 854026525, 2456782176),
(967641241, 1001077664, 3556790281)
]
data3 = [(0, 10, 'A'), (100, 200, 'B'), (150, 180, 'C')]
print('discretize(data1):', timeit.timeit(stmt="discretize(data1)", number=100_000, globals=globals()))
print('discretize2(data1):', timeit.timeit(stmt="discretize2(data1)", number=100_000, globals=globals()))
print()
print('discretize(data2):', timeit.timeit(stmt="discretize(data2)", number=100_000, globals=globals()))
print('discretize2(data2):', timeit.timeit(stmt="discretize2(data2)", number=100_000, globals=globals()))
print()
print('discretize(data3):', timeit.timeit(stmt="discretize(data3)", number=100_000, globals=globals()))
print('discretize2(data3):', timeit.timeit(stmt="discretize2(data3)", number=100_000, globals=globals())) compare_results('data1',
discretize(data1),
discretize2(data1))
compare_results('data2',
discretize(data2),
discretize2(data2)
)
compare_results('data3',
discretize(data3),
discretize2(data3)
)
discretize(data1): 2.0479232
discretize2(data1): 0.9702454
discretize(data2): 3.2891357999999995
discretize2(data2): 1.5896913000000001
discretize(data3): 0.36877010000000077
discretize2(data3): 0.13420890000000085
Comparing Results Using data1 as Input:
Passed!
Comparing Results Using data2 as Input:
Passed!
Comparing Results Using data3 as Input:
Passed!
If you stick with your version, I would suggest that you add a docstring, type hints and comments.
Update
I have abandoned my implementation, which I agree does not work on the test case the OP posted. However, I do believe that the OP's code also produces erroneous results. So, first I am replicating here what had been comments I made to my answer:
I have been trying to modify my code so that I get the same results with the test data you posted here . I printed out the first few ranges in your results and ranges 17 through 19 (origin 0) are
Range(2414, 2573, 14), Range(2574, 2599, 9), Range(2414, 2574, 14)
. That does not look right to me. The algorithm you state is correct, which I have not studied, has the following ranges for elements 17 - 19:(2414, 2574, 14), (2575, 2599, 9), (2600, 2816, 6)
, which is clearly different from what your code produced. The test data has as input[... (2414, 2574, 14), (2574, 2599, 9), ...]
. I would think that the value for key 2574 should be 9 if we apply the dictionary updates in order. This suggests to me that the known "correct" algorithm is not so correct and elements 17 - 19 should be(2414, 2573, 14), (2574, 2599, 9), (2600, 2816, 6)
.
To verify what I expected the correct results to be, I implemented code that computes the inefficient way: I allocate a list values
whose size if MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
where MAX_RANGE_NUMBER
would correspond to the maximum value of t[1]
where t
enumerates all the tuples in your test data and MIN_RANGE_NUMBER
is the minimum value of t[0]
. I initialize each element of the list to None
. I then proceed to iterate each tuple in the test data. If a tuple were, for example, (17, 27, 19)
I would assign values[17-MIN_RANGE_NUMBER:27+MIN_RANGE_NUMBER+1] = [19] * 11
. After all ranges have been thus processed it is fairly simple to process the values
list to come up with the non-overlapping ranges. I then print out the first 20 such ranges:
from typing import List, Tuple
def discretize_correct(ranges: List[Tuple]) -> List[Tuple]:
"""We are passed a list of tuples each representing a range
and we return a new list of range tuples."""
if not ranges: # Empty list
return []
results = []
MIN_RANGE_NUMBER = min(range[0] for range in ranges)
MAX_RANGE_NUMBER = max(range[1] for range in ranges)
range_size = MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
values = [None] * range_size
for range in ranges:
start, end, value = range
start -= MIN_RANGE_NUMBER
end -= MIN_RANGE_NUMBER
values[start:end+1] = [value] * (end + 1 - start)
results = []
start_idx, last_value = None, None
for idx, value in enumerate(values, start=MIN_RANGE_NUMBER):
if value is None:
if last_value is not None:
result.append((start_idx, idx - 1, last_value))
last_value = None
elif last_value is None:
start_idx = idx
last_value = value
elif value != last_value:
results.append((start_idx, idx - 1, last_value))
start_idx = idx
last_value = value
if last_value is not None:
results.append((start_idx, MAX_RANGE_NUMBER, last_value))
return results
if __name__ == '__main__':
from ast import literal_eval
with open('test.txt') as f:
data = literal_eval(f.read())
ranges = discretize_correct(data)
# Print out first 20 ranges:
for i in range(20):
print(f'{i}: {ranges[i]}')
0: (10, 181, 2)
1: (182, 706, 11)
2: (707, 920, 6)
3: (921, 1018, 7)
4: (1019, 1032, 6)
5: (1033, 1200, 2)
6: (1201, 1204, 6)
7: (1205, 1617, 0)
8: (1618, 1667, 6)
9: (1668, 1905, 11)
10: (1906, 1977, 6)
11: (1978, 2015, 2)
12: (2016, 2075, 6)
13: (2076, 2082, 3)
14: (2083, 2201, 6)
15: (2202, 2320, 10)
16: (2321, 2413, 6)
17: (2414, 2573, 14)
18: (2574, 2599, 9)
19: (2600, 2816, 6)
Conclusion
The above verifies that your code and the supposedly correct code posted here both produce results not only different from one another but also from what I believe are truly the correct results. Even if I have misunderstood the problem definition rendering the above results from my discretize_correct
function irrelevant, the OP's results still vary from those produced from what the OP says is correct code.
Consequently, should the OP's code now be moved to stackoverflow.com for correction before it is re-posted here?
- 3k
- 3
- 13