Python Float point range() operator implementation

Question 1

I recently wrote a full implementation of a range() operator with floating point values in Python, and I'm not sure if I've followed the standard pythonic patterns. From what I can tell, it works. I've written a few test cases, but they don't cover everything yet. I've also published this in a gist on github as well

This was written in Python 3.6.4, though I think it should work in most 3.x flavors. Here's FloatRange.py

# -*- coding: utf-8 -*-
from numbers import Real
class FloatRange:
 """
 step/precision used as the tolerance in __contains__ method. Defaults to 1/(10**10).
 This is used to attempt to mitigate some of the floating point rounding error, but scales with the step size
 so that smaller step sizes are not susceptible to catching more than they ought. 
 Basically, there's a 2 in <precision> chance that a number will randomly 
 """
 _precision = 10**10
 _start, _step = 0, 1 # stop must be defined by user. No use putting it in here.
 """
 Here we set up our constraints as the user passes their values:
 - You can pass the values as follows:
 - stop (start defaults to 0, step defaults to 1)
 - start, stop (step again defaults to 1)
 - start, stop, step
 - start, step, and stop must be Real numbers
 - The step must be non-zero
 - The step must be in the same direction as the difference between stop and start
 """
 def __init__(self, *args, **kwargs):
 self.start = self._start
 self.step = self._step
 self.precision = self._precision
 self.counter = 0
 if len(args) == 1:
 (self.stop, ) = args # FloatRange 
 elif len(args) == 2:
 (self.start, self.stop) = args
 elif len(args) == 3:
 (self.start, self.stop, self.step) = args
 else:
 raise TypeError("FloatRange accepts 1, 2, or 3 arguments. ({0} given)".format(len(args)))
 for num in self.start, self.step, self.stop:
 if not isinstance(num, Real):
 raise TypeError("FloatRange only accepts Real number arguments. ({0} : {1} given)".format(type(num), str(num)))
 if self.step == 0:
 raise ValueError("FloatRange step cannot be 0")
 if (self.stop-self.start)/self.step < 0:
 raise ValueError("FloatRange value must be in the same direction as the start->stop")
 self.set_precision(self._precision) # x in FloatRange will return True for values within 0.1% of step size 
 if len(kwargs) > 0:
 for key, value in kwargs.items():
 if key == "precision":
 if value < 0:
 raise ValueError("FloatRange precision must be positive")
 self.tolerance = self.step/self.precision
 else:
 raise ValueError("Unknown kwargs key: {0}".format(key))
 """
 Returns the next value in the iterator, or it stops the iteration and resets.
 """
 def __next__(self):
 output = self.start + (self.counter * self.step)
 if ((self.step > 0 and output >= self.stop) or
 (self.step < 0 and output <= self.stop)) :
 self.counter = 0
 raise StopIteration
 self.counter += 1
 return output
 """
 The class already implements __next__(), so it is its own iterable
 """
 def __iter__(self):
 return self
 def set_precision(self, precision=None):
 if precision is None:
 self.precision = self._precision
 elif isinstance(precision, Real):
 if precision < 0:
 raise ValueError("FloatRange precision cannot be a negative number.")
 self.precision = precision
 else:
 raise ValueError("FloatRange precision must be a Real number.")
 self.tolerance = abs(self.step/self.precision)
 """
 len(my_FloatRange)
 """
 def __len__(self):
 # we have to do this minute addition here so that floating point rounding does not fail. 
 return int((self.stop - self.start + self.step/10**13) / self.step)
 """
 x in my_FloatRange
 Evaluates whether a given number is contained in the range, in constant time. 
 Non-exact values will return True if they are within the provided tolerance.
 Use set_precision(precision) to define the precision:step ratio (the tolerance)
 """
 def __contains__(self, item):
 diff = (item - self.start) % self.step
 # if we're dealing with exact cases (not recommended, but okay.)
 if (self.step > 0 and 
 item >= self.start-self.tolerance and 
 item < self.stop):
 return (min(diff, self.step-diff) < self.tolerance)
 elif (self.step < 0 and 
 item <= self.start+self.tolerance and 
 item > self.stop
 ):
 return (min(abs(diff), abs(self.step-diff)) < self.tolerance)
 return False
 def __str__(self):
 return self.__repr__()
 def __repr__(self):
 ext = ""
 if not self.step == 1:
 ext += ", {0}".format(self.step)
 if self.precision != self._precision:
 ext += ", precision={0}, tolerance={1}".format(
 self.precision, self.tolerance
 )
 return "FloatRange({0}, {1}{2})".format(
 self.start,
 self.stop,
 ext
 )

Here are my current preliminary test casestest_FloatRange.py

# -*- coding: utf-8 -*-
import unittest
from FloatRange import FloatRange
class TestCase_FloatRange(unittest.TestCase):
 def test_compare_basic(self, start=None, stop=1, step=None, verbose=False):
 my_range = None
 my_FloatRange = None
 if step is None:
 if start is None:
 my_range = range(stop)
 my_FloatRange = FloatRange(stop)
 else:
 my_range = range(start, stop)
 my_FloatRange = FloatRange(start, stop)
 else:
 my_range = range(start, stop, step)
 my_FloatRange = FloatRange(start, stop, step)
 if verbose:
 print("Validating:[{0}] == [{1}]".format(
 my_range, my_FloatRange))
 for x,y in zip(my_range, my_FloatRange):
 try:
 self.assertEqual(x,y)
 except:
 print("{0} and {1} failed to produce the same values.".format(
 my_range, my_FloatRange
 ))
 raise
 def test_compare_range_functionality(self):
 _length = 10 # arbitrary number for adequate length
 _step = 2
 _start = 5
 self.test_compare_basic(stop = _length)
 self.test_compare_basic(start =_start,
 stop = _length)
 self.test_compare_basic(start=_start,
 stop= _start+_length)
 self.test_compare_basic(start=_start,
 stop= _start+_length*_step,
 step= _step)
 def test_correct_length(self):
 for _divisor in range(1, 100):
 for _step_base in range(1, 100):
 for _length in range(1, 100):
 _step = _step_base / _divisor
 _start = 1 / _divisor + 1
 _stop = _start + _length*_step
 my_FloatRange = FloatRange(_start,
 _stop,
 _step)
 try:
 self.assertEqual(len(my_FloatRange), _length)
 except Exception:
 print("Length test failed with parameters:\n\tstart:{0}\n\tstop :{1}\n\tstep: {2}\n\tvalue: {2}".format(
 _start, _stop, _step, len(my_FloatRange)
 ))
 raise
 def test_value_set(self, subject=FloatRange(1), values=[0], verbose=False):
 if verbose:
 print("Validating {0} produces {1}".format(subject, values))
 try:
 self.assertEqual(len(subject), len(values))
 except:
 print("{0} and {1} do not have the same length!".format(subject, values))
 raise
 for f, v in zip(subject, values):
 try:
 self.assertAlmostEqual(f, v) # floating point rounding doesn't allow for exact equality.
 except:
 print("{0} does not produce {1}".format(subject, values))
 raise
 def test_values(self):
 self.test_value_set(FloatRange(0, 10, 1/3), [(x/3) for x in range(30)])
 self.test_value_set(FloatRange(5, 15, 1/3), [(5+(x/3)) for x in range(30)])
 self.test_value_set(FloatRange(1, 11, 1/7), [(1+(x/7)) for x in range(70)])
 self.test_value_set(FloatRange(8, 18, 1/7), [(8+(x/7)) for x in range(70)])
if __name__ == '__main__':
 unittest.main()

My initial test cases pass, and it seems to work alright. Is there anything else that I should be doing?

Question 2

Why do you use _start and _step names instead of just start and step for class attributes?

Question 3

I’m surprised by the "(stop - start) and step must have the same sign" condition. list(range(0, 10, -1)) is the empty list, not an error.

Question 4

hmmm... I suppose that could be fixed with a similar condition in the next implementation

Question 5

Let's start with comments about the code you've submitted, before we discuss some more important overlying concepts and design decisions.

Good

Docstrings for each method (some could be more helpful though, like __len__)
Some comments for perhaps unclear lines
Python 3 style classes (didn't inherit from object)
You have unit tests!
Good use of ValueError

Improvements

You don't need # -*- coding: utf-8 -*- with Python 3
You formatting is pretty inconsistent. Try PEP8 (it's the standard formatting that most projects adhere to)
You seem to prefix a lot of variables with _. It seems like you may be confused about kwargs. If you do foo(bar=1), bar is not a variable. So if you had bar = 1, it's perfectly legal (and encouraged) to do foo(bar=bar). Although, consider if it really is unclear what the param means. Perhaps a positional arg works just fine. If that's not the case, we basically exclusively use _ for private instance properties (like self._start)
Your test_compare_basic isn't actually a test case. Methods starting with test_ should exercise a specific test case or group of test cases. test_compare_basic is actually a generic way of testing any range. Writing it was a fantastic idea, because it makes writing the later tests much more succinct and clear. However, naming it test_ means that it is run by the test harness (and it shouldn't be run alone). I usually call these functions assert* to match the unittest framework (camelCase unfortunately, as this is what unittest does, but you could break this if you wanted). Eg. name it assertFloatRangeCorrect. Then your tests look like:

 def test_simple_float_ranges(self):
 # These reads much more like sentences now...
 self.assertFloatRangeCorrect(0.5, 5.0, 0.5)
 self.assertFloatRangeCorrect(1, 2, 0.25)

I see you have try/except in your tests to print a message. You shouldn't be doing this. For one, the message won't be grouped with the error (or it's stack trace). You can just pass the extra optional msg argument to assertEqual: self.assertEqual(len(actual), len(expected), f'len({actual}) != len({expected})') (notice my use of f-strings, they're definitely cleaner here). By doing this, your tests become a lot shorter and you avoid the try/except/raise dance.
For testing exact equality, instead of ziping two iterables just use self.assertEqual(iterable_a, iterable_b). This will also produce a nice error message automatically.
Your check against Real is strange (more on this later)
What is going on with the _precision, _start, and _step? You shouldn't have those.
Don't use *args like this in __init__. Use arg defaults. Eg. def __init__(self, start=0, stop=1, step=1) (I know this doesn't work perfectly with your current argument scheme, but later I'll argue you should change it)
In tuple unpacking ((self.stop, ) = args) you don't need the parens'
Your __iter__ docstring should be a comment. It doesn't explain to a user of FloatRange how to use the class. But you can eliminate it, because that's obvious from the fact you return self.
Having the range be it's own iterator is strange (and uncommon). And I'll argue against it later.
Minor nit but in __str__ use repr(self). We usually don't call dunder methods (with the prominent exception being super().__init__(...)).
In __repr__ use f-strings. They much easier to construct and they give a better idea of the output format.
You should put FloatRange in float_range.py instead of FloatRange.py
Comparing floats with 0 is usually not what you want. Rarely will the result of arithmetic be exactly 0. You want math.isclose

Now, let's talk about the big concept here. Python's builtin range doesn't support floats as you likely know. There is good reason for this. Floating point math does not always work as pen and paper decimal math due to representation issues. A similar problem would be adding 1/3 as a decimal by hand 3 times. You expect 1, but since you only have a finite number of decimals, it won't be exactly 1 (it'll be 0.99...).

What does this have to do with your float range? It poses two interesting problems for the user of FloatRange if they're used to range().

The upper bound may not produce the range that you expect due to representation errors alluded to above. Where we can know that range(5) will always have 5 numbers, we can't really be so sure about the length of range(0, 10, 0.1) (that is, unless the start, stop, and step are exactly 0, 10, and 0,1--floats are deterministic given the same operations in the same order) because of floating point inaccuracies. Sure, we can divide like you did. However, with your precision factor, I suspect that length won't always be right. The trouble here is we need to decide what stop means. For range, it's much easier to say because integers are exact. range can be thought of as filling in the number line between start and stop (excluding stop). We probably want to exclude stop too for consistency, but FloatRange is more of a finite set of points between start and stop exclusive. Because of this, membership is a little more tricky. You could define membership as being within the range or being an explicit member from the iteration of the range. For range(), these two are equivalent because integers are countable.

You seem to have chosen the later definition of __contains__. But, it does beg the question: is this actually meaningful? Is there a context where you'd need to check floating point within a tolerance of some number of (finite-representable) discrete points in some range.

As a result of these issues, this FloatRange is way more complicated than it needs to be. You also make some common mistakes with comparing floating point numbers that will fail for extrema.

As an aside, let's also take a look at your constructor parameters. You allow for 1, 2, or 3 arguments (excluding precision), like range. I think the only really meaningful constructors are the 2 and 3 argument ones. The single argument assumes a start of 0 and step of 1. But, then this is precisely just range (so why not use range?). It seems like it's really only meaningful to define a floating point stop and step (with a start of 0) or all 3. But, if you really feel strongly about the 1 argument case, you can of course keep it.

Now that we've discussed the problems, let's take a stab at some solutions. I see two solutions.

You want a range starting at start that adds step until the number is >= stop. This is more like what you've implemented (and similar to range in some regards, except its length is not constant-time computable). I'd recommend not defining __len__. If you do, you may want to warn that it is not constant time. Why is this? Well you could do (stop - start) / step, but as you likely found, this has accuracy issues. These are the same representation issues we mentioned above. Furthermore, it is difficult to account for fancier bounds checking (ie. if you want to keep producing numbers until one is less than or "close to" stop for some definition of close to--like within some threshold).

from itertools import count, takewhile
class FloatRange:
 def __init__(self, start, stop=None, step=1):
 # No to handle # of arguments manually
 if stop is None:
 stop = start
 start = 0
 if any(not isinstance(x, float) for x in (start, stop, step)):
 raise ValueError('start, stop, step must be floats')
 if (start < stop and step < 0) or (start > stop and step > 0):
 raise ValueError('step sign must match (stop - start)')
 self.start = start
 self.stop = stop
 self.step = step
 def __iter__(self):
 return takewhile(lambda x: x < self.stop, (self.start + i * self.step
 for i in count(0)))

Note we need no custom iterator or lots of logic. itertools can do most of the heavy lifting. Furthermore, we can update the predicate lambda x: to also include some definition of less than or close to like so: lambda x: x < self.stop and not math.isclose(x, self.stop, ...). Look at math.isclose to see what you need to pass (you need two params, not just tolerance). If you really need __len__:

def __len__(self):
 count = 0
 for x in self:
 count += 1
 return count

I'd recommend against __contains__ here because determining the index count have precision issues for extrema. Eg. self.step * round((x - self.start) / self.step) could be unstable.

You want a range that takes some pre-determinted number of steps of size step from start. Notice there is no stop here. __len__ is immediately obvious. I'd recommend maybe not defining __contains__ for now.

This case is very straightfoward:

class FloatRange:
 def __init__(self, start, *, step=1, steps=0): # here I require step and steps to be kwargs for clarity
 if any(not isinstance(x, float) for x in (start, step)):
 raise ValueError('start and step must be floats')
 if not isinstance(steps, int) or x < 0:
 raise ValueError('steps must be a positive integer')
 self.start = start
 self.step = step
 self.steps = steps
 def __iter__(self):
 return (self[i] for i in range(self.steps))
 def __getitem__(self, i):
 if not 0 <= i < self.steps:
 raise IndexError('FloatRange index out of range')
 return self.start + i * self.step
 def __len__(self):
 return self.steps

Here we can easily define __len__. __contains__ is still tricky, because determining the index of a potential member of the range could be unstable. Here, though, because we can compute the end of the range in constant time (it's exactly start + steps * step), we can do some sort of clever binary search. More specifically, we can search for numbers close to the desired numbers (for some metric of closeness that you determine) and stop once the numbers we find are less than the desired number and decreasing (negative step) OR greater than the desired number and increasing (positive step). This comes nearly for free because we were able to define __getitem__ (which we couldn't before because we couldn't bound the indices). We note that in this way, this FloatRange behaves much more like range() even though the constructor parameters are different.

You may argue that since steps must be an integer, if you placed some sane limits on it then it would be impossible to construct a member whose index calculation is unstable. Unfortunately, because the index calculation involves a multiply/divide this is just not the case. By reading the IEEE 754 spec you can construct a degenerate case. Specifically, for large indices (which would initially be a float when resulting from the index computation) the floating point resolution is so wide that converting to an int does not produce the correct index. This is especially true for Python because int is arbitrary precision.

Question 6

Wow, that's quite detailed. Thank you for your time... I'm gonna have to read through this a few times to pick it apart. You mentioned that my comparison against Real was strange... could you expand on that? Obviously, I thought it was a reasonable comparison.

Question 7

You may want to test isinstance(x, (float, int)) to allow calls like FloatRange(3, 9, .1) without error.

Question 8

Also, as regard to your discussion on the number of arguments, I find the use-case of a single argument perfectly valid and not necessarily replaceable by range. Think of FloatRange(9.3) for instance.

Question 9

@DavidCulbreth For one, isinstance is used pretty sparingly. Although, it's definitely appropriate in this context. Checking against Real is appropriate depending on the kind of numbers you'd want to accept. Perhaps in this context it's appropriate, but that depends. @MathiasEttinger has a good point that you should allow both floats and ints.

Question 10

@MathiasEttinger Yes, I had considered that case. I see your point, but that's equivalent to map(float, range(ceil(x)) or preferably range(ceil(x)). The latter gives you more operations (since range is more featured with better performance). Although, in my sample implementations I did support this 1-arg constructor.

Question 11

You probably should replace **kwargs in __init__ with precision=10**10:

def __init__(self, *args, precision=10**10):
 self.precision = precision
 self.set_precision(precision) # not self._precision

and remove last huge block of __init__ that validates kwargs

The way you implement __iter__ has following disadvantage:

r = FloatRange(1., 2., 0.3)
iterator1 = iter(r)
iterator2 = iter(r)
assert next(iterator1) == 1.0
assert next(iterator2) == 1.3
iterator3 = iter(r)
assert next(iterator3) == 1.6

If you run the same code with built-in python range iterator2 and iterator3 will produce original sequence, not empty. You probably should remove __next__ method and return generator in __iter__:

def __iter__(self):
 output = self.start
 while (self.step > 0) == (output < self.stop) and output != self.stop:
 yield output
 output += self.step

Question 12

What version of Python are you using? when I ran your code, it failed on assert list(iterator2) == [] --- AssertionError That is where it is supposed to fail, yes?

Question 13

It's my mistake. I wanted to point out that consuming one iterators have side effect on other iterator because all iterators are the same object. The assertion error happen because consuming iterator1 resets its state so it is possible to iterate with it again. Updated the answer

score 4 · Accepted Answer · 2019-01-22 10:05:54Z

Let's start with comments about the code you've submitted, before we discuss some more important overlying concepts and design decisions.

Good

Docstrings for each method (some could be more helpful though, like __len__)
Some comments for perhaps unclear lines
Python 3 style classes (didn't inherit from object)
You have unit tests!
Good use of ValueError

Improvements

You don't need # -*- coding: utf-8 -*- with Python 3
You formatting is pretty inconsistent. Try PEP8 (it's the standard formatting that most projects adhere to)
You seem to prefix a lot of variables with _. It seems like you may be confused about kwargs. If you do foo(bar=1), bar is not a variable. So if you had bar = 1, it's perfectly legal (and encouraged) to do foo(bar=bar). Although, consider if it really is unclear what the param means. Perhaps a positional arg works just fine. If that's not the case, we basically exclusively use _ for private instance properties (like self._start)
Your test_compare_basic isn't actually a test case. Methods starting with test_ should exercise a specific test case or group of test cases. test_compare_basic is actually a generic way of testing any range. Writing it was a fantastic idea, because it makes writing the later tests much more succinct and clear. However, naming it test_ means that it is run by the test harness (and it shouldn't be run alone). I usually call these functions assert* to match the unittest framework (camelCase unfortunately, as this is what unittest does, but you could break this if you wanted). Eg. name it assertFloatRangeCorrect. Then your tests look like:

 def test_simple_float_ranges(self):
 # These reads much more like sentences now...
 self.assertFloatRangeCorrect(0.5, 5.0, 0.5)
 self.assertFloatRangeCorrect(1, 2, 0.25)

I see you have try/except in your tests to print a message. You shouldn't be doing this. For one, the message won't be grouped with the error (or it's stack trace). You can just pass the extra optional msg argument to assertEqual: self.assertEqual(len(actual), len(expected), f'len({actual}) != len({expected})') (notice my use of f-strings, they're definitely cleaner here). By doing this, your tests become a lot shorter and you avoid the try/except/raise dance.
For testing exact equality, instead of ziping two iterables just use self.assertEqual(iterable_a, iterable_b). This will also produce a nice error message automatically.
Your check against Real is strange (more on this later)
What is going on with the _precision, _start, and _step? You shouldn't have those.
Don't use *args like this in __init__. Use arg defaults. Eg. def __init__(self, start=0, stop=1, step=1) (I know this doesn't work perfectly with your current argument scheme, but later I'll argue you should change it)
In tuple unpacking ((self.stop, ) = args) you don't need the parens'
Your __iter__ docstring should be a comment. It doesn't explain to a user of FloatRange how to use the class. But you can eliminate it, because that's obvious from the fact you return self.
Having the range be it's own iterator is strange (and uncommon). And I'll argue against it later.
Minor nit but in __str__ use repr(self). We usually don't call dunder methods (with the prominent exception being super().__init__(...)).
In __repr__ use f-strings. They much easier to construct and they give a better idea of the output format.
You should put FloatRange in float_range.py instead of FloatRange.py
Comparing floats with 0 is usually not what you want. Rarely will the result of arithmetic be exactly 0. You want math.isclose