Return to Answer

Bounty Awarded with 50 reputation awarded by Ξένη Γήινος

occurred Jul 15, 2023 at 12:19

Correct code.

edited Jul 14, 2023 at 12:45

from typing import List, Tuple
def discretize_correct(ranges: List[Tuple]) -> List[Tuple]:
 """We are passed a list of tuples each representing a range
 and we return a new list of range tuples."""
 if not ranges: # Empty list
 return []
 results = []
 MIN_RANGE_NUMBER = min(range[0] for range in ranges)
 MAX_RANGE_NUMBER = max(range[1] for range in ranges)
 range_size = MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
 values = [None] * range_size
 for range in ranges:
 start, end, value = range
 start -= MIN_RANGE_NUMBER
 end -= MIN_RANGE_NUMBER
 values[start:end+1] = [value] * (end + 1 - start)
 results = []
 start_idx, last_value = None, None
 for idx, value in enumerate(values, start=MIN_RANGE_NUMBER):
 if value is None:
 if last_value is not None:
 resultresults.append((start_idx, idx - 1, last_value))
 last_value = None
 elif last_value is None:
 start_idx = idx
 last_value = value
 elif value != last_value:
 results.append((start_idx, idx - 1, last_value))
 start_idx = idx
 last_value = value
 if last_value is not None:
 results.append((start_idx, MAX_RANGE_NUMBER, last_value))
 return results
if __name__ == '__main__':
 from ast import literal_eval
 with open('test.txt') as f:
 data = literal_eval(f.read())
 ranges = discretize_correct(data)
 # Print out first 20 ranges:
 for i in range(20):
 print(f'{i}: {ranges[i]}')

from typing import List, Tuple
def discretize_correct(ranges: List[Tuple]) -> List[Tuple]:
 """We are passed a list of tuples each representing a range
 and we return a new list of range tuples."""
 if not ranges: # Empty list
 return []
 results = []
 MIN_RANGE_NUMBER = min(range[0] for range in ranges)
 MAX_RANGE_NUMBER = max(range[1] for range in ranges)
 range_size = MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
 values = [None] * range_size
 for range in ranges:
 start, end, value = range
 start -= MIN_RANGE_NUMBER
 end -= MIN_RANGE_NUMBER
 values[start:end+1] = [value] * (end + 1 - start)
 results = []
 start_idx, last_value = None, None
 for idx, value in enumerate(values, start=MIN_RANGE_NUMBER):
 if value is None:
 if last_value is not None:
 result.append((start_idx, idx - 1, last_value))
 last_value = None
 elif last_value is None:
 start_idx = idx
 last_value = value
 elif value != last_value:
 results.append((start_idx, idx - 1, last_value))
 start_idx = idx
 last_value = value
 if last_value is not None:
 results.append((start_idx, MAX_RANGE_NUMBER, last_value))
 return results
if __name__ == '__main__':
 from ast import literal_eval
 with open('test.txt') as f:
 data = literal_eval(f.read())
 ranges = discretize_correct(data)
 # Print out first 20 ranges:
 for i in range(20):
 print(f'{i}: {ranges[i]}')

from typing import List, Tuple
def discretize_correct(ranges: List[Tuple]) -> List[Tuple]:
 """We are passed a list of tuples each representing a range
 and we return a new list of range tuples."""
 if not ranges: # Empty list
 return []
 results = []
 MIN_RANGE_NUMBER = min(range[0] for range in ranges)
 MAX_RANGE_NUMBER = max(range[1] for range in ranges)
 range_size = MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
 values = [None] * range_size
 for range in ranges:
 start, end, value = range
 start -= MIN_RANGE_NUMBER
 end -= MIN_RANGE_NUMBER
 values[start:end+1] = [value] * (end + 1 - start)
 results = []
 start_idx, last_value = None, None
 for idx, value in enumerate(values, start=MIN_RANGE_NUMBER):
 if value is None:
 if last_value is not None:
 results.append((start_idx, idx - 1, last_value))
 last_value = None
 elif last_value is None:
 start_idx = idx
 last_value = value
 elif value != last_value:
 results.append((start_idx, idx - 1, last_value))
 start_idx = idx
 last_value = value
 if last_value is not None:
 results.append((start_idx, MAX_RANGE_NUMBER, last_value))
 return results
if __name__ == '__main__':
 from ast import literal_eval
 with open('test.txt') as f:
 data = literal_eval(f.read())
 ranges = discretize_correct(data)
 # Print out first 20 ranges:
 for i in range(20):
 print(f'{i}: {ranges[i]}')

Improve grammar.

Source Link

edited Jul 14, 2023 at 0:32

Booboo

edited Jul 14, 2023 at 0:32

Booboo

I have abandoned my implementation, which I agree does not work on the test case the OP posted. However, I do believe that the OP's code also produces erroneous results. So, first I am replicating here what had beenbelow comments I had made to my answer that highlight what I perceive to be the problem:

To verify what I expected the correct results to be, I implemented code that computes the ranges in an inefficient but hopefully correct way: I allocate a list values whose size if MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1 where MAX_RANGE_NUMBER would correspond to the maximum value of t[1] where t enumerates all the tuples in your test data and MIN_RANGE_NUMBER is the minimum value of t[0]. I initialize each element of the list to None. I then proceed to iterate each tuple in the test data. If a tuple were, for example, (17, 27, 19) I would assign values[17-MIN_RANGE_NUMBER:27+MIN_RANGE_NUMBER+1] = [19] * 11. After all ranges have been thus processed it is fairly simple to process the values list to come up with the non-overlapping ranges. I then print out the first 20 such ranges:

The above verifies that yourthe OP's code and the supposedly correct code posted here both produce results not only different from one another but also from what I believe are truly the correct results. Even if I have misunderstood the problem definition rendering the above results from my discretize_correct function irrelevant, the OP's results still vary from those produced from what the OP says is correct code.

To verify what I expected the correct results to be, I implemented code that computes the inefficient way: I allocate a list values whose size if MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1 where MAX_RANGE_NUMBER would correspond to the maximum value of t[1] where t enumerates all the tuples in your test data and MIN_RANGE_NUMBER is the minimum value of t[0]. I initialize each element of the list to None. I then proceed to iterate each tuple in the test data. If a tuple were, for example, (17, 27, 19) I would assign values[17-MIN_RANGE_NUMBER:27+MIN_RANGE_NUMBER+1] = [19] * 11. After all ranges have been thus processed it is fairly simple to process the values list to come up with the non-overlapping ranges. I then print out the first 20 such ranges:

The above verifies that your code and the supposedly correct code posted here both produce results not only different from one another but also from what I believe are truly the correct results. Even if I have misunderstood the problem definition rendering the above results from my discretize_correct function irrelevant, the OP's results still vary from those produced from what the OP says is correct code.

I have abandoned my implementation, which I agree does not work on the test case the OP posted. However, I do believe that the OP's code also produces erroneous results. So first I am replicating below comments I had made to my answer that highlight what I perceive to be the problem:

The above verifies that the OP's code and the supposedly correct code posted here both produce results not only different from one another but also from what I believe are truly the correct results. Even if I have misunderstood the problem definition rendering the above results from my discretize_correct function irrelevant, the OP's results still vary from those produced from what the OP says is correct code.

Added an update.

Source Link

edited Jul 13, 2023 at 20:44

Booboo

edited Jul 13, 2023 at 20:44

Booboo

It may not be much faster for all possible cases you mayUpdate

I have abandoned my implementation, but it is considerably faster forwhich I agree does not work on the input cases you showedtest case the OP posted. You will need to try this out on other inputsHowever, I do believe that you could expectthe OP's code also produces erroneous results. In any case usingSo, first I am replicating here what had been comments I made to my answer:

I have been trying to modify my code so that I get the same results with the test data you posted here . I printed out the first few ranges in your results and ranges 17 through 19 (origin 0) are Range(2414, 2573, 14), Range(2574, 2599, 9), Range(2414, 2574, 14). That does not look right to me. The algorithm you state is correct, which I have not studied, has the following ranges for elements 17 - 19: (2414, 2574, 14), (2575, 2599, 9), (2600, 2816, 6), which is clearly different from what your code produced. The test data has as input [... (2414, 2574, 14), (2574, 2599, 9), ...]. I would think that the value for key 2574 should be 9 if we apply the dictionary updates in order. This suggests to me that the known "correct" algorithm is not so correct and elements 17 - 19 should be (2414, 2573, 14), (2574, 2599, 9), (2600, 2816, 6).

To verify what I expected the correct results to be, I implemented code that computes the inefficient way: I allocate a list heapqvalues is easier to understandwhose size if (and thusMAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1 where MAX_RANGE_NUMBER would correspond to maintain). The new function is calledthe maximum value of discretize2t[1] inwhere t enumerates all the code belowtuples in your test data and returns a listMIN_RANGE_NUMBER is the minimum value of range tuplest[0]. It also has the advantage thatI initialize each element of the input does not havelist to be sorted (if you have unsorted input,None. I then un-comment out the callproceed to iterate each tuple in the test data. If a tuple were, for example, heapq.heapify(17, 27, 19)). I leavewould assign values[17-MIN_RANGE_NUMBER:27+MIN_RANGE_NUMBER+1] = [19] * 11. After all ranges have been thus processed it to you as ais fairly simple exercise to modifyprocess the code if you require a list of Rangevalues instanceslist to come up with the non-overlapping ranges. I then print out the first 20 such ranges:

from collections import deque
class Range:
 processed = deque()
 stack = deque()
 def __init__(self, ini, fin, data) -> None:
 self.ini = ini
 self.fin = fin
 self.data = data
 def __repr__(self):
 return f'Range({self.ini}, {self.fin}, {self.data})'
 def cover(self, fin):
 if fin <= self.fin:
 return True
 def join(self, ini, fin):
 if ini <= self.fin + 1:
 self.fin = fin
 return True
 def prepend(self, ini):
 if self.ini < ini:
 Range.processed.append(Range(self.ini, ini - 1, self.data))
 def climb(self, fin):
 if self.ini < (ini := fin + 1) <= self.fin:
 self.ini = ini
 def update(self, ini, fin, data):
 if self.ini == ini and self.fin == fin:
 self.data = data
 return True
 def process(self, ini, fin, data):
 ignore = past = False
 if self.data == data:
 ignore = self.cover(fin) or self.join(ini, fin)
 elif ini <= self.fin and not (ignore := self.update(ini, fin, data)):
 self.prepend(ini)
 if not ignore:
 if ini > self.fin:
 Range.processed.append(self)
 Range.stack.pop()
 past = True
 self.climb(fin)
 return not ignore, past
def discretize(ranges):
 Range.processed.clear()
 Range.stack.clear()
 for ini, fin, data in ranges:
 if not Range.stack:
 Range.stack.append(Range(ini, fin, data))
 else:
 append, past = Range.stack[-1].process(ini, fin, data)
 while past and Range.stack:
 _, past = Range.stack[-1].process(ini, fin, data)
 if append:
 Range.stack.append(Range(ini, fin, data))
 Range.processed.extend(reversed(Range.stack))
 return Range.processed
##################################################################################
from typing import List, Tuple
import heapq
def discretize2discretize_correct(dataranges: List[Tuple]) -> List[Tuple]:
 """We are passed a list of tuples each representing a range
 and we return a new list of range tuples."""
 if not dataranges: # Empty list
 return []
 results = []
 # Make copy of passed data, which we do not want to destroy:
 rangesMIN_RANGE_NUMBER = data.copymin()
 # Call to heapq.heapify only neededrange[0] for unsorted input:
  range in #heapq.heapify(ranges) # Turn into a heap

 while ranges:
  range1MAX_RANGE_NUMBER = heapq.heappopmax(ranges) # Get minimum "range"
 if not ranges:range[1] #for Norange morein ranges
 results.append(range1) # Add to results
 range_size = returnMAX_RANGE_NUMBER results- #MIN_RANGE_NUMBER And+ return
1
 range2values = ranges[0] # Get new[None] minimum* rangerange_size
 iffor range1[1]range <in range2[0]ranges:
 start, end, value = results.append(range1)range
 else:
 start -= MIN_RANGE_NUMBER
 results.append((range1[0], range2[0]end - 1,= range1[2]))MIN_RANGE_NUMBER
 values[start:end+1] = [value] heapq.heappush(ranges,* (range2[1]end + 1, range1[1], range1[2]))
if __name__ == '__main__':
 import timeit
 def compare_results(data: str, results1: List[Range], results2: List[Tuple]) -> None:
  print(start)
 print(f"Comparing Results Using {data} as Input:")
 l1results = len(results1)[]
 start_idx, l2last_value = len(results2)
  if l1 !=None, l2:None
 for idx, value raisein Exceptionenumerate('Results have differentvalues, lengths.'start=MIN_RANGE_NUMBER):
 forif ivalue inis range(l1)None:
 if (results1[i].ini, results1[i].fin,last_value results1[i].data)is !=not results2[i]None:
 raise Exception('Different resultsresult.')
 print('Passed!')
 data1 = [
 (3, 2789, 2681),
 append(27, 2349, 2004),
 (29, 1790, 2072)start_idx, idx (32,- 15071, 1173last_value),
 (35, 768, 3453),
 (63, 222, 2108),
 (341,last_value 501,= 2938),None
 (378, 444, 258),
  elif (870,last_value 888,is 1364),None:
 (1684, 1693, 880)
 ]
 data2start_idx = [(2518812, 3931503981, 1222922854),idx
 (8283077, 3890282434, 2360189310),
 (21368257,last_value 3571587114,= 2651809994),value
 (24571285, 2875098555, 1506778869),
  elif (40433776,value 1751972813,!= 4162881198),last_value:
 (60822897, 1413187009, 921758666),
 results.append(73435921, 1245663774, 3798627995),
 (169616863, 530686872, 1316302364)start_idx, idx (273065748,- 4323632051, 3428529648last_value),
 (291266380, 409571968, 4206669862),
 (558988640, 574048639, 4272236023),
 (619240775,start_idx 631101725,= 1910576108),idx
 (727687992, 756241696, 3095538347),
 (825700858,last_value 866696475,= 2647033502),value
 (853058882, 854026525, 2456782176),
 (967641241, 1001077664, 3556790281)
if last_value is not ]
None:
 data3 = [(0, 10, 'A'), (100, 200, 'B'), (150, 180, 'C')]
 print('discretize(data1):', timeitresults.timeitappend(stmt="discretize(data1)"start_idx, number=100_000MAX_RANGE_NUMBER, globals=globals()last_value))
 print('discretize2(data1):', timeit.timeit(stmt="discretize2(data1)", number=100_000,return globals=globals()))results
 print()
if __name__ == print('discretize(data2)'__main__':', timeit.timeit(stmt="discretize(data2)", number=100_000, globals=globals()))
 print('discretize2(data2):',from timeit.timeit(stmt="discretize2(data2)",ast number=100_000,import globals=globals()))literal_eval
 print()
 print('discretize(data3):', timeit.timeit(stmt="discretize(data3)", number=100_000, globals=globals()))
 with print('discretize2open(data3):', timeit'test.timeit(stmt="discretize2(data3)", number=100_000, globals=globals())txt') as compare_results('data1',f:
 data = discretizeliteral_eval(data1),
 discretize2f.read(data1))
 compare_results('data2',
 discretize(data2),
 ranges = discretize2discretize_correct(data2data)
 # )
Print out first 20 compare_results('data3',ranges:
 for i in discretizerange(data320),:
 discretize2print(data3)
 f'{i}: {ranges[i]}')

discretize(data1)0: (10, 181, 2.0479232)
discretize21: (data1182, 706, 11)
2: 0.9702454
discretize(data2707, 920, 6)
3: 3.2891357999999995
discretize2(data2921, 1018, 7)
4: 1.5896913000000001
discretize(data31019, 1032, 6)
5: 0.36877010000000077(1033, 1200, 2)
discretize26: (data31201, 1204, 6)
7: (1205, 1617, 0.13420890000000085
)
Comparing8: Results(1618, Using1667, data16)
9: as(1668, Input:
Passed!1905, 11)
10: (1906, 1977, 6)
Comparing11: Results(1978, Using2015, data22)
12: as(2016, Input2075, 6)
13: (2076, 2082, 3)
Passed!14: (2083, 2201, 6)
15: (2202, 2320, 10)
Comparing16: Results(2321, Using2413, data36)
17: as(2414, Input2573, 14)
18: (2574, 2599, 9)
Passed!19: (2600, 2816, 6)

If you stick withConclusion

The above verifies that your version,code and the supposedly correct code posted here both produce results not only different from one another but also from what I would suggest that you add a docstringbelieve are truly the correct results. Even if I have misunderstood the problem definition rendering the above results from my discretize_correct function irrelevant, type hints and commentsthe OP's results still vary from those produced from what the OP says is correct code.

Consequently, should the OP's code now be moved to stackoverflow.com for correction before it is re-posted here?

It may not be much faster for all possible cases you may have, but it is considerably faster for the input cases you showed. You will need to try this out on other inputs that you could expect. In any case using a heapq is easier to understand (and thus to maintain). The new function is called discretize2 in the code below and returns a list of range tuples. It also has the advantage that the input does not have to be sorted (if you have unsorted input, then un-comment out the call to heapq.heapify). I leave it to you as a simple exercise to modify the code if you require a list of Range instances:

from collections import deque
class Range:
 processed = deque()
 stack = deque()
 def __init__(self, ini, fin, data) -> None:
 self.ini = ini
 self.fin = fin
 self.data = data
 def __repr__(self):
 return f'Range({self.ini}, {self.fin}, {self.data})'
 def cover(self, fin):
 if fin <= self.fin:
 return True
 def join(self, ini, fin):
 if ini <= self.fin + 1:
 self.fin = fin
 return True
 def prepend(self, ini):
 if self.ini < ini:
 Range.processed.append(Range(self.ini, ini - 1, self.data))
 def climb(self, fin):
 if self.ini < (ini := fin + 1) <= self.fin:
 self.ini = ini
 def update(self, ini, fin, data):
 if self.ini == ini and self.fin == fin:
 self.data = data
 return True
 def process(self, ini, fin, data):
 ignore = past = False
 if self.data == data:
 ignore = self.cover(fin) or self.join(ini, fin)
 elif ini <= self.fin and not (ignore := self.update(ini, fin, data)):
 self.prepend(ini)
 if not ignore:
 if ini > self.fin:
 Range.processed.append(self)
 Range.stack.pop()
 past = True
 self.climb(fin)
 return not ignore, past
def discretize(ranges):
 Range.processed.clear()
 Range.stack.clear()
 for ini, fin, data in ranges:
 if not Range.stack:
 Range.stack.append(Range(ini, fin, data))
 else:
 append, past = Range.stack[-1].process(ini, fin, data)
 while past and Range.stack:
 _, past = Range.stack[-1].process(ini, fin, data)
 if append:
 Range.stack.append(Range(ini, fin, data))
 Range.processed.extend(reversed(Range.stack))
 return Range.processed
##################################################################################
from typing import List, Tuple
import heapq
def discretize2(data: List[Tuple]) -> List[Tuple]:
 """We are passed a list of tuples each representing a range
 and we return a new list of range tuples."""
 if not data: # Empty list
 return []
 results = []
 # Make copy of passed data, which we do not want to destroy:
 ranges = data.copy()
 # Call to heapq.heapify only needed for unsorted input:
  #heapq.heapify(ranges) # Turn into a heap

 while ranges:
  range1 = heapq.heappop(ranges) # Get minimum "range"
 if not ranges: # No more ranges
 results.append(range1) # Add to results
 return results # And return

 range2 = ranges[0] # Get new minimum range
 if range1[1] < range2[0]:
 results.append(range1)
 else:
  results.append((range1[0], range2[0] - 1, range1[2]))
 heapq.heappush(ranges, (range2[1] + 1, range1[1], range1[2]))
if __name__ == '__main__':
 import timeit
 def compare_results(data: str, results1: List[Range], results2: List[Tuple]) -> None:
  print()
 print(f"Comparing Results Using {data} as Input:")
 l1 = len(results1)
 l2 = len(results2)
  if l1 != l2:
 raise Exception('Results have different lengths.')
 for i in range(l1):
 if (results1[i].ini, results1[i].fin, results1[i].data) != results2[i]:
 raise Exception('Different results.')
 print('Passed!')
 data1 = [
 (3, 2789, 2681),
 (27, 2349, 2004),
 (29, 1790, 2072), (32, 1507, 1173),
 (35, 768, 3453),
 (63, 222, 2108),
 (341, 501, 2938),
 (378, 444, 258),
  (870, 888, 1364),
 (1684, 1693, 880)
 ]
 data2 = [(2518812, 3931503981, 1222922854),
 (8283077, 3890282434, 2360189310),
 (21368257, 3571587114, 2651809994),
 (24571285, 2875098555, 1506778869),
  (40433776, 1751972813, 4162881198),
 (60822897, 1413187009, 921758666),
 (73435921, 1245663774, 3798627995),
 (169616863, 530686872, 1316302364), (273065748, 432363205, 3428529648),
 (291266380, 409571968, 4206669862),
 (558988640, 574048639, 4272236023),
 (619240775, 631101725, 1910576108),
 (727687992, 756241696, 3095538347),
 (825700858, 866696475, 2647033502),
 (853058882, 854026525, 2456782176),
 (967641241, 1001077664, 3556790281)
 ]

 data3 = [(0, 10, 'A'), (100, 200, 'B'), (150, 180, 'C')]
 print('discretize(data1):', timeit.timeit(stmt="discretize(data1)", number=100_000, globals=globals()))
 print('discretize2(data1):', timeit.timeit(stmt="discretize2(data1)", number=100_000, globals=globals()))
 print()
 print('discretize(data2):', timeit.timeit(stmt="discretize(data2)", number=100_000, globals=globals()))
 print('discretize2(data2):', timeit.timeit(stmt="discretize2(data2)", number=100_000, globals=globals()))
 print()
 print('discretize(data3):', timeit.timeit(stmt="discretize(data3)", number=100_000, globals=globals()))
  print('discretize2(data3):', timeit.timeit(stmt="discretize2(data3)", number=100_000, globals=globals())) compare_results('data1',
 discretize(data1),
 discretize2(data1))
 compare_results('data2',
 discretize(data2),
 discretize2(data2)
 )
 compare_results('data3',
 discretize(data3),
 discretize2(data3)
  )

discretize(data1): 2.0479232
discretize2(data1): 0.9702454
discretize(data2): 3.2891357999999995
discretize2(data2): 1.5896913000000001
discretize(data3): 0.36877010000000077
discretize2(data3): 0.13420890000000085

Comparing Results Using data1 as Input:
Passed!
Comparing Results Using data2 as Input:
Passed!
Comparing Results Using data3 as Input:
Passed!

If you stick with your version, I would suggest that you add a docstring, type hints and comments.

Update

I have been trying to modify my code so that I get the same results with the test data you posted here . I printed out the first few ranges in your results and ranges 17 through 19 (origin 0) are Range(2414, 2573, 14), Range(2574, 2599, 9), Range(2414, 2574, 14). That does not look right to me. The algorithm you state is correct, which I have not studied, has the following ranges for elements 17 - 19: (2414, 2574, 14), (2575, 2599, 9), (2600, 2816, 6), which is clearly different from what your code produced. The test data has as input [... (2414, 2574, 14), (2574, 2599, 9), ...]. I would think that the value for key 2574 should be 9 if we apply the dictionary updates in order. This suggests to me that the known "correct" algorithm is not so correct and elements 17 - 19 should be (2414, 2573, 14), (2574, 2599, 9), (2600, 2816, 6).

from typing import List, Tuple
def discretize_correct(ranges: List[Tuple]) -> List[Tuple]:
 """We are passed a list of tuples each representing a range
 and we return a new list of range tuples."""
 if not ranges: # Empty list
 return []
 results = []
 MIN_RANGE_NUMBER = min(range[0] for range in ranges)
 MAX_RANGE_NUMBER = max(range[1] for range in ranges)
 range_size = MAX_RANGE_NUMBER - MIN_RANGE_NUMBER + 1
 values = [None] * range_size
 for range in ranges:
 start, end, value = range
 start -= MIN_RANGE_NUMBER
 end -= MIN_RANGE_NUMBER
 values[start:end+1] = [value] * (end + 1 - start)
 results = []
 start_idx, last_value = None, None
 for idx, value in enumerate(values, start=MIN_RANGE_NUMBER):
 if value is None:
 if last_value is not None:
 result.append((start_idx, idx - 1, last_value))
 last_value = None
 elif last_value is None:
 start_idx = idx
 last_value = value
 elif value != last_value:
 results.append((start_idx, idx - 1, last_value))
 start_idx = idx
 last_value = value
 if last_value is not None:
 results.append((start_idx, MAX_RANGE_NUMBER, last_value))
 return results
if __name__ == '__main__':
 from ast import literal_eval
 with open('test.txt') as f:
 data = literal_eval(f.read())
 ranges = discretize_correct(data)
 # Print out first 20 ranges:
 for i in range(20):
 print(f'{i}: {ranges[i]}')

0: (10, 181, 2)
1: (182, 706, 11)
2: (707, 920, 6)
3: (921, 1018, 7)
4: (1019, 1032, 6)
5: (1033, 1200, 2)
6: (1201, 1204, 6)
7: (1205, 1617, 0)
8: (1618, 1667, 6)
9: (1668, 1905, 11)
10: (1906, 1977, 6)
11: (1978, 2015, 2)
12: (2016, 2075, 6)
13: (2076, 2082, 3)
14: (2083, 2201, 6)
15: (2202, 2320, 10)
16: (2321, 2413, 6)
17: (2414, 2573, 14)
18: (2574, 2599, 9)
19: (2600, 2816, 6)