I recently wrote a full implementation of a range()
operator with floating point values in Python, and I'm not sure if I've followed the standard pythonic patterns. From what I can tell, it works. I've written a few test cases, but they don't cover everything yet. I've also published this in a gist on github as well
This was written in Python 3.6.4, though I think it should work in most 3.x flavors.
Here's FloatRange.py
# -*- coding: utf-8 -*-
from numbers import Real
class FloatRange:
"""
step/precision used as the tolerance in __contains__ method. Defaults to 1/(10**10).
This is used to attempt to mitigate some of the floating point rounding error, but scales with the step size
so that smaller step sizes are not susceptible to catching more than they ought.
Basically, there's a 2 in <precision> chance that a number will randomly
"""
_precision = 10**10
_start, _step = 0, 1 # stop must be defined by user. No use putting it in here.
"""
Here we set up our constraints as the user passes their values:
- You can pass the values as follows:
- stop (start defaults to 0, step defaults to 1)
- start, stop (step again defaults to 1)
- start, stop, step
- start, step, and stop must be Real numbers
- The step must be non-zero
- The step must be in the same direction as the difference between stop and start
"""
def __init__(self, *args, **kwargs):
self.start = self._start
self.step = self._step
self.precision = self._precision
self.counter = 0
if len(args) == 1:
(self.stop, ) = args # FloatRange
elif len(args) == 2:
(self.start, self.stop) = args
elif len(args) == 3:
(self.start, self.stop, self.step) = args
else:
raise TypeError("FloatRange accepts 1, 2, or 3 arguments. ({0} given)".format(len(args)))
for num in self.start, self.step, self.stop:
if not isinstance(num, Real):
raise TypeError("FloatRange only accepts Real number arguments. ({0} : {1} given)".format(type(num), str(num)))
if self.step == 0:
raise ValueError("FloatRange step cannot be 0")
if (self.stop-self.start)/self.step < 0:
raise ValueError("FloatRange value must be in the same direction as the start->stop")
self.set_precision(self._precision) # x in FloatRange will return True for values within 0.1% of step size
if len(kwargs) > 0:
for key, value in kwargs.items():
if key == "precision":
if value < 0:
raise ValueError("FloatRange precision must be positive")
self.tolerance = self.step/self.precision
else:
raise ValueError("Unknown kwargs key: {0}".format(key))
"""
Returns the next value in the iterator, or it stops the iteration and resets.
"""
def __next__(self):
output = self.start + (self.counter * self.step)
if ((self.step > 0 and output >= self.stop) or
(self.step < 0 and output <= self.stop)) :
self.counter = 0
raise StopIteration
self.counter += 1
return output
"""
The class already implements __next__(), so it is its own iterable
"""
def __iter__(self):
return self
def set_precision(self, precision=None):
if precision is None:
self.precision = self._precision
elif isinstance(precision, Real):
if precision < 0:
raise ValueError("FloatRange precision cannot be a negative number.")
self.precision = precision
else:
raise ValueError("FloatRange precision must be a Real number.")
self.tolerance = abs(self.step/self.precision)
"""
len(my_FloatRange)
"""
def __len__(self):
# we have to do this minute addition here so that floating point rounding does not fail.
return int((self.stop - self.start + self.step/10**13) / self.step)
"""
x in my_FloatRange
Evaluates whether a given number is contained in the range, in constant time.
Non-exact values will return True if they are within the provided tolerance.
Use set_precision(precision) to define the precision:step ratio (the tolerance)
"""
def __contains__(self, item):
diff = (item - self.start) % self.step
# if we're dealing with exact cases (not recommended, but okay.)
if (self.step > 0 and
item >= self.start-self.tolerance and
item < self.stop):
return (min(diff, self.step-diff) < self.tolerance)
elif (self.step < 0 and
item <= self.start+self.tolerance and
item > self.stop
):
return (min(abs(diff), abs(self.step-diff)) < self.tolerance)
return False
def __str__(self):
return self.__repr__()
def __repr__(self):
ext = ""
if not self.step == 1:
ext += ", {0}".format(self.step)
if self.precision != self._precision:
ext += ", precision={0}, tolerance={1}".format(
self.precision, self.tolerance
)
return "FloatRange({0}, {1}{2})".format(
self.start,
self.stop,
ext
)
Here are my current preliminary test casestest_FloatRange.py
# -*- coding: utf-8 -*-
import unittest
from FloatRange import FloatRange
class TestCase_FloatRange(unittest.TestCase):
def test_compare_basic(self, start=None, stop=1, step=None, verbose=False):
my_range = None
my_FloatRange = None
if step is None:
if start is None:
my_range = range(stop)
my_FloatRange = FloatRange(stop)
else:
my_range = range(start, stop)
my_FloatRange = FloatRange(start, stop)
else:
my_range = range(start, stop, step)
my_FloatRange = FloatRange(start, stop, step)
if verbose:
print("Validating:[{0}] == [{1}]".format(
my_range, my_FloatRange))
for x,y in zip(my_range, my_FloatRange):
try:
self.assertEqual(x,y)
except:
print("{0} and {1} failed to produce the same values.".format(
my_range, my_FloatRange
))
raise
def test_compare_range_functionality(self):
_length = 10 # arbitrary number for adequate length
_step = 2
_start = 5
self.test_compare_basic(stop = _length)
self.test_compare_basic(start =_start,
stop = _length)
self.test_compare_basic(start=_start,
stop= _start+_length)
self.test_compare_basic(start=_start,
stop= _start+_length*_step,
step= _step)
def test_correct_length(self):
for _divisor in range(1, 100):
for _step_base in range(1, 100):
for _length in range(1, 100):
_step = _step_base / _divisor
_start = 1 / _divisor + 1
_stop = _start + _length*_step
my_FloatRange = FloatRange(_start,
_stop,
_step)
try:
self.assertEqual(len(my_FloatRange), _length)
except Exception:
print("Length test failed with parameters:\n\tstart:{0}\n\tstop :{1}\n\tstep: {2}\n\tvalue: {2}".format(
_start, _stop, _step, len(my_FloatRange)
))
raise
def test_value_set(self, subject=FloatRange(1), values=[0], verbose=False):
if verbose:
print("Validating {0} produces {1}".format(subject, values))
try:
self.assertEqual(len(subject), len(values))
except:
print("{0} and {1} do not have the same length!".format(subject, values))
raise
for f, v in zip(subject, values):
try:
self.assertAlmostEqual(f, v) # floating point rounding doesn't allow for exact equality.
except:
print("{0} does not produce {1}".format(subject, values))
raise
def test_values(self):
self.test_value_set(FloatRange(0, 10, 1/3), [(x/3) for x in range(30)])
self.test_value_set(FloatRange(5, 15, 1/3), [(5+(x/3)) for x in range(30)])
self.test_value_set(FloatRange(1, 11, 1/7), [(1+(x/7)) for x in range(70)])
self.test_value_set(FloatRange(8, 18, 1/7), [(8+(x/7)) for x in range(70)])
if __name__ == '__main__':
unittest.main()
My initial test cases pass, and it seems to work alright. Is there anything else that I should be doing?
2 Answers 2
Let's start with comments about the code you've submitted, before we discuss some more important overlying concepts and design decisions.
Good
- Docstrings for each method (some could be more helpful though, like
__len__
) - Some comments for perhaps unclear lines
- Python 3 style classes (didn't inherit from
object
) - You have unit tests!
- Good use of
ValueError
Improvements
- You don't need
# -*- coding: utf-8 -*-
with Python 3 - You formatting is pretty inconsistent. Try PEP8 (it's the standard formatting that most projects adhere to)
- You seem to prefix a lot of variables with
_
. It seems like you may be confused about kwargs. If you dofoo(bar=1)
,bar
is not a variable. So if you hadbar = 1
, it's perfectly legal (and encouraged) to dofoo(bar=bar)
. Although, consider if it really is unclear what the param means. Perhaps a positional arg works just fine. If that's not the case, we basically exclusively use_
for private instance properties (likeself._start
) - Your
test_compare_basic
isn't actually a test case. Methods starting withtest_
should exercise a specific test case or group of test cases.test_compare_basic
is actually a generic way of testing any range. Writing it was a fantastic idea, because it makes writing the later tests much more succinct and clear. However, naming ittest_
means that it is run by the test harness (and it shouldn't be run alone). I usually call these functionsassert*
to match the unittest framework (camelCase unfortunately, as this is whatunittest
does, but you could break this if you wanted). Eg. name itassertFloatRangeCorrect
. Then your tests look like:
def test_simple_float_ranges(self):
# These reads much more like sentences now...
self.assertFloatRangeCorrect(0.5, 5.0, 0.5)
self.assertFloatRangeCorrect(1, 2, 0.25)
- I see you have
try
/except
in your tests to print a message. You shouldn't be doing this. For one, the message won't be grouped with the error (or it's stack trace). You can just pass the extra optionalmsg
argument toassertEqual
:self.assertEqual(len(actual), len(expected), f'len({actual}) != len({expected})')
(notice my use of f-strings, they're definitely cleaner here). By doing this, your tests become a lot shorter and you avoid thetry
/except
/raise
dance. - For testing exact equality, instead of
zip
ing two iterables just useself.assertEqual(iterable_a, iterable_b)
. This will also produce a nice error message automatically. - Your check against
Real
is strange (more on this later) - What is going on with the
_precision
,_start
, and_step
? You shouldn't have those. - Don't use
*args
like this in__init__
. Use arg defaults. Eg.def __init__(self, start=0, stop=1, step=1)
(I know this doesn't work perfectly with your current argument scheme, but later I'll argue you should change it) - In tuple unpacking (
(self.stop, ) = args
) you don't need the parens' - Your
__iter__
docstring should be a comment. It doesn't explain to a user ofFloatRange
how to use the class. But you can eliminate it, because that's obvious from the fact youreturn self
. - Having the range be it's own iterator is strange (and uncommon). And I'll argue against it later.
- Minor nit but in
__str__
userepr(self)
. We usually don't call dunder methods (with the prominent exception beingsuper().__init__(...)
). - In
__repr__
use f-strings. They much easier to construct and they give a better idea of the output format. - You should put
FloatRange
infloat_range.py
instead ofFloatRange.py
- Comparing floats with
0
is usually not what you want. Rarely will the result of arithmetic be exactly 0. You wantmath.isclose
Now, let's talk about the big concept here. Python's builtin range doesn't support float
s as you likely know. There is good reason for this. Floating point math does not always work as pen and paper decimal math due to representation issues. A similar problem would be adding 1/3 as a decimal by hand 3 times. You expect 1, but since you only have a finite number of decimals, it won't be exactly 1 (it'll be 0.99...
).
What does this have to do with your float range? It poses two interesting problems for the user of FloatRange
if they're used to range()
.
The upper bound may not produce the range that you expect due to representation errors alluded to above. Where we can know that range(5)
will always have 5 numbers, we can't really be so sure about the length of range(0, 10, 0.1)
(that is, unless the start, stop, and step are exactly 0, 10, and 0,1--floats are deterministic given the same operations in the same order) because of floating point inaccuracies. Sure, we can divide like you did. However, with your precision factor, I suspect that length won't always be right. The trouble here is we need to decide what stop
means. For range
, it's much easier to say because integers are exact. range
can be thought of as filling in the number line between start
and stop
(excluding stop
). We probably want to exclude stop
too for consistency, but FloatRange
is more of a finite set of points between start
and stop
exclusive. Because of this, membership is a little more tricky. You could define membership as being within the range or being an explicit member from the iteration of the range. For range()
, these two are equivalent because integers are countable.
You seem to have chosen the later definition of __contains__
. But, it does beg the question: is this actually meaningful? Is there a context where you'd need to check floating point within a tolerance of some number of (finite-representable) discrete points in some range.
As a result of these issues, this FloatRange
is way more complicated than it needs to be. You also make some common mistakes with comparing floating point numbers that will fail for extrema.
As an aside, let's also take a look at your constructor parameters. You allow for 1, 2, or 3 arguments (excluding precision), like range
. I think the only really meaningful constructors are the 2 and 3 argument ones. The single argument assumes a start of 0
and step of 1
. But, then this is precisely just range
(so why not use range
?). It seems like it's really only meaningful to define a floating point stop and step (with a start of 0) or all 3. But, if you really feel strongly about the 1 argument case, you can of course keep it.
Now that we've discussed the problems, let's take a stab at some solutions. I see two solutions.
You want a range starting at start
that adds step
until the number is >= stop
. This is more like what you've implemented (and similar to range
in some regards, except its length is not constant-time computable). I'd recommend not defining __len__
. If you do, you may want to warn that it is not constant time. Why is this? Well you could do (stop - start) / step
, but as you likely found, this has accuracy issues. These are the same representation issues we mentioned above. Furthermore, it is difficult to account for fancier bounds checking (ie. if you want to keep producing numbers until one is less than or "close to" stop
for some definition of close to--like within some threshold
).
from itertools import count, takewhile
class FloatRange:
def __init__(self, start, stop=None, step=1):
# No to handle # of arguments manually
if stop is None:
stop = start
start = 0
if any(not isinstance(x, float) for x in (start, stop, step)):
raise ValueError('start, stop, step must be floats')
if (start < stop and step < 0) or (start > stop and step > 0):
raise ValueError('step sign must match (stop - start)')
self.start = start
self.stop = stop
self.step = step
def __iter__(self):
return takewhile(lambda x: x < self.stop, (self.start + i * self.step
for i in count(0)))
Note we need no custom iterator or lots of logic. itertools
can do most of the heavy lifting. Furthermore, we can update the predicate lambda x:
to also include some definition of less than or close to like so: lambda x: x < self.stop and not math.isclose(x, self.stop, ...)
. Look at math.isclose
to see what you need to pass (you need two params, not just tolerance). If you really need __len__
:
def __len__(self):
count = 0
for x in self:
count += 1
return count
I'd recommend against __contains__
here because determining the index count have precision issues for extrema. Eg. self.step * round((x - self.start) / self.step)
could be unstable.
You want a range that takes some pre-determinted number of steps
of size step
from start
. Notice there is no stop here. __len__
is immediately obvious. I'd recommend maybe not defining __contains__
for now.
This case is very straightfoward:
class FloatRange:
def __init__(self, start, *, step=1, steps=0): # here I require step and steps to be kwargs for clarity
if any(not isinstance(x, float) for x in (start, step)):
raise ValueError('start and step must be floats')
if not isinstance(steps, int) or x < 0:
raise ValueError('steps must be a positive integer')
self.start = start
self.step = step
self.steps = steps
def __iter__(self):
return (self[i] for i in range(self.steps))
def __getitem__(self, i):
if not 0 <= i < self.steps:
raise IndexError('FloatRange index out of range')
return self.start + i * self.step
def __len__(self):
return self.steps
Here we can easily define __len__
. __contains__
is still tricky, because determining the index of a potential member of the range could be unstable. Here, though, because we can compute the end of the range in constant time (it's exactly start + steps * step
), we can do some sort of clever binary search. More specifically, we can search for numbers close to the desired numbers (for some metric of closeness that you determine) and stop once the numbers we find are less than the desired number and decreasing (negative step) OR greater than the desired number and increasing (positive step). This comes nearly for free because we were able to define __getitem__
(which we couldn't before because we couldn't bound the indices). We note that in this way, this FloatRange
behaves much more like range()
even though the constructor parameters are different.
You may argue that since steps
must be an integer, if you placed some sane limits on it then it would be impossible to construct a member whose index calculation is unstable. Unfortunately, because the index calculation involves a multiply/divide this is just not the case. By reading the IEEE 754 spec you can construct a degenerate case. Specifically, for large indices (which would initially be a float when resulting from the index computation) the floating point resolution is so wide that converting to an int
does not produce the correct index. This is especially true for Python because int
is arbitrary precision.
-
\$\begingroup\$ Wow, that's quite detailed. Thank you for your time... I'm gonna have to read through this a few times to pick it apart. You mentioned that my comparison against
Real
was strange... could you expand on that? Obviously, I thought it was a reasonable comparison. \$\endgroup\$David Culbreth– David Culbreth2019年01月22日 14:10:19 +00:00Commented Jan 22, 2019 at 14:10 -
1\$\begingroup\$ You may want to test
isinstance(x, (float, int))
to allow calls likeFloatRange(3, 9, .1)
without error. \$\endgroup\$301_Moved_Permanently– 301_Moved_Permanently2019年01月22日 14:53:09 +00:00Commented Jan 22, 2019 at 14:53 -
\$\begingroup\$ Also, as regard to your discussion on the number of arguments, I find the use-case of a single argument perfectly valid and not necessarily replaceable by
range
. Think ofFloatRange(9.3)
for instance. \$\endgroup\$301_Moved_Permanently– 301_Moved_Permanently2019年01月23日 08:41:55 +00:00Commented Jan 23, 2019 at 8:41 -
\$\begingroup\$ @DavidCulbreth For one,
isinstance
is used pretty sparingly. Although, it's definitely appropriate in this context. Checking againstReal
is appropriate depending on the kind of numbers you'd want to accept. Perhaps in this context it's appropriate, but that depends. @MathiasEttinger has a good point that you should allow bothfloat
s andint
s. \$\endgroup\$Bailey Parker– Bailey Parker2019年01月24日 00:29:21 +00:00Commented Jan 24, 2019 at 0:29 -
1\$\begingroup\$ @MathiasEttinger Yes, I had considered that case. I see your point, but that's equivalent to
map(float, range(ceil(x))
or preferablyrange(ceil(x))
. The latter gives you more operations (sincerange
is more featured with better performance). Although, in my sample implementations I did support this 1-arg constructor. \$\endgroup\$Bailey Parker– Bailey Parker2019年01月24日 00:31:04 +00:00Commented Jan 24, 2019 at 0:31
You probably should replace **kwargs
in __init__
with precision=10**10
:
def __init__(self, *args, precision=10**10):
self.precision = precision
self.set_precision(precision) # not self._precision
and remove last huge block of __init__
that validates kwargs
The way you implement __iter__
has following disadvantage:
r = FloatRange(1., 2., 0.3)
iterator1 = iter(r)
iterator2 = iter(r)
assert next(iterator1) == 1.0
assert next(iterator2) == 1.3
iterator3 = iter(r)
assert next(iterator3) == 1.6
If you run the same code with built-in python range
iterator2 and iterator3 will produce original sequence, not empty. You probably should remove __next__
method and return generator in __iter__
:
def __iter__(self):
output = self.start
while (self.step > 0) == (output < self.stop) and output != self.stop:
yield output
output += self.step
-
\$\begingroup\$ What version of Python are you using? when I ran your code, it failed on
assert list(iterator2) == [] --- AssertionError
That is where it is supposed to fail, yes? \$\endgroup\$David Culbreth– David Culbreth2019年01月22日 13:57:38 +00:00Commented Jan 22, 2019 at 13:57 -
\$\begingroup\$ It's my mistake. I wanted to point out that consuming one iterators have side effect on other iterator because all iterators are the same object. The assertion error happen because consuming iterator1 resets its state so it is possible to iterate with it again. Updated the answer \$\endgroup\$belkka– belkka2019年01月27日 17:45:59 +00:00Commented Jan 27, 2019 at 17:45
_start
and_step
names instead of juststart
andstep
for class attributes? \$\endgroup\$list(range(0, 10, -1))
is the empty list, not an error. \$\endgroup\$