I've written this class to wrap a collection of bytes and interpret them as 16-bit floats. It's supposed to work like memoryview(buf).cast('f')
or array.array('f', buf)
I'm trying to avoid converting back and forth between values as much as possible. cPython does not currently support using the format code 'e'
as the format argument to array.array
.
The motivation is to support 16bit floating point arrays from this RFC: https://www.rfc-editor.org/rfc/rfc8746.html#name-types-of-numbers as part of this decoder: https://github.com/agronholm/cbor2
Is there anything else I can add or take away?
import struct
from collections.abc import Sequence
class Float16Array(Sequence):
"""
Takes a bytes or bytearray object and interprets it as an array of
16-bit IEEE half-floats
Behaves a bit like if you could create an array.array('e', [1, 2, 3.7])
"""
def __init__(self, buf):
self.hbuf = memoryview(buf).cast('H')
@staticmethod
def _to_h(v):
"convert float to an unsigned 16 bit integer representation"
return struct.unpack('H', struct.pack('e', v))[0]
@staticmethod
def _to_v(h):
"convert 16-bit integer back to regular float"
return struct.unpack('e', struct.pack('H', h))[0]
def __len__(self):
return len(self.hbuf)
def __eq__(self, other):
if isinstance(other, self.__class__):
return self.hbuf == other.hbuf
if isinstance(other, Sequence):
if len(self) != len(other):
return False
for hval, oval in zip(self.hbuf, other):
try:
if hval != self._to_h(oval):
return False
except struct.error:
return False
return True
else:
raise NotImplemented
def __getitem__(self, key):
if isinstance(key, slice):
return self.__class__(self.hbuf[key].cast('B'))
item = self.hbuf[key]
return self._to_v(item)
def __contains__(self, value):
try:
return self._to_h(value) in self.hbuf
except struct.error:
return False
def __reversed__(self):
for item in reversed(self.hbuf):
yield self._to_v(item)
def index(self, value, start=0, stop=None):
buf = self.hbuf[start:stop]
try:
buf_val = self._to_h(value)
except struct.error:
raise TypeError('value must be float or int') from None
for i, v in enumerate(buf):
if v is buf_val or v == buf_val:
return i
raise ValueError
def count(self, value):
try:
buf_val = self._to_h(value)
except struct.error:
raise TypeError('value must be float or int') from None
return sum(1 for v in self.hbuf if v == buf_val)
def __repr__(self):
contents = ', '.join('{:.2f}'.format(v).rstrip('0') for v in self)
return self.__class__.__name__ + '(' + contents + ')'
if __name__ == '__main__':
my_array = Float16Array(struct.pack('eeee', 0.1, 0.1, 72.0, 3.141))
assert 0.1 in my_array
assert my_array.count(72) == 1
assert my_array.count(0.1)
assert my_array == [0.1, 0.1, 72.0, 3.141]
print(list(reversed(my_array)))
print(my_array)
assert my_array[0:-1] == Float16Array(struct.pack('eee', 0.1, 0.1, 72.0))
-
1\$\begingroup\$ Why are you packing like this? Embedded application, network application, etc. \$\endgroup\$Reinderien– Reinderien2021年06月03日 17:15:09 +00:00Commented Jun 3, 2021 at 17:15
-
1\$\begingroup\$ @Reinderien This is for packed arrays from this specification rfc-editor.org/rfc/rfc8746.html#name-types-of-numbers and is intended to be an example of an extended data type for this decoder: github.com/agronholm/cbor2 I will add this to the question. \$\endgroup\$Sekenre– Sekenre2021年06月04日 09:46:12 +00:00Commented Jun 4, 2021 at 9:46
2 Answers 2
- Add PEP484 type hints
- In your
to_h
andto_v
, consider tuple-unpacking the return value fromstruct
to get a free assertion that there is only one item NotImplemented
is not a very friendly way to handle comparison of disparate types. I would far sooner expectreturn False
.- Move your testing code into a function to avoid namespace pollution
- Single-quotes are not standard for docstrings; use triple quotes instead
- your
__repr__
was broken and used__name
where it needed__name__
- Have you considered replacing most of this with a Numpy half-precision array created via frombuffer?
Suggested
import struct
from collections.abc import Sequence
from typing import Union, Any, Iterable, Optional
class Float16Array(Sequence):
"""
Takes a bytes or bytearray object and interprets it as an array of
16-bit IEEE half-floats
Behaves a bit like if you could create an array.array('e', [1, 2, 3.7])
"""
def __init__(self, buf: bytes):
self.hbuf = memoryview(buf).cast('H')
@staticmethod
def _to_h(v: float) -> int:
"""convert float to an unsigned 16 bit integer representation"""
i, = struct.unpack('H', struct.pack('e', v))
return i
@staticmethod
def _to_v(h: int) -> float:
"""convert 16-bit integer back to regular float"""
f, = struct.unpack('e', struct.pack('H', h))
return f
def __len__(self) -> int:
return len(self.hbuf)
def __eq__(self, other: Any) -> bool:
if isinstance(other, self.__class__):
return self.hbuf == other.hbuf
if not isinstance(other, Sequence):
return False
if len(self) != len(other):
return False
for hval, oval in zip(self.hbuf, other):
try:
if hval != self._to_h(oval):
return False
except struct.error:
return False
return True
def __getitem__(self, key: Union[int, slice]) -> float:
if isinstance(key, slice):
return self.__class__(self.hbuf[key].cast('B'))
item = self.hbuf[key]
return self._to_v(item)
def __contains__(self, value: float) -> bool:
try:
return self._to_h(value) in self.hbuf
except struct.error:
return False
def __reversed__(self) -> Iterable[float]:
for item in reversed(self.hbuf):
yield self._to_v(item)
def index(self, value: float, start: int = 0, stop: Optional[int] = None) -> int:
buf = self.hbuf[start:stop]
try:
buf_val = self._to_h(value)
except struct.error:
raise TypeError('value must be float or int') from None
for i, v in enumerate(buf):
if v is buf_val or v == buf_val:
return i
raise ValueError
def count(self, value: Union[float, int]) -> int:
try:
buf_val = self._to_h(value)
except struct.error:
raise TypeError('value must be float or int') from None
return sum(1 for v in self.hbuf if v == buf_val)
def __repr__(self) -> str:
contents = ', '.join('{:.2f}'.format(v).rstrip('0') for v in self)
return f'{self.__class__.__name__}({contents})'
def test():
my_array = Float16Array(struct.pack('eeee', 0.1, 0.1, 72.0, 3.141))
assert 0.1 in my_array
assert my_array.count(72) == 1
assert my_array.count(0.1)
assert my_array == [0.1, 0.1, 72.0, 3.141]
print(list(reversed(my_array)))
print(my_array)
assert my_array[0:-1] == Float16Array(struct.pack('eee', 0.1, 0.1, 72.0))
if __name__ == '__main__':
test()
Suggested (numpy)
No custom code; the equivalent test is:
def test_new():
data = (0.1, 0.1, 72.0, 3.141)
my_array = np.array(data, dtype=np.float16)
assert 0.1 in my_array
assert np.sum(my_array == 72) == 1
assert np.sum(my_array == 0.1) == 2
assert np.all(np.isclose(my_array, data, rtol=1e-4, atol=1e-4))
print(my_array[::-1])
print(my_array)
assert np.all(np.isclose(
my_array[:-1],
np.array(data[:-1], dtype=np.float16),
))
-
\$\begingroup\$ That's great thank you! In most cases I would use Numpy, but I didn't want to add additional dependencies to the library I'm working on (except as optional extras) \$\endgroup\$Sekenre– Sekenre2021年07月19日 17:25:45 +00:00Commented Jul 19, 2021 at 17:25
-
\$\begingroup\$ That doesn't sound like very solid rationale.
numpy
is a very common secondary dependency in the pip ecosystem, and in fact it's likely that some of your users would wantnumpy
compatibility for other reasons anyway. \$\endgroup\$Reinderien– Reinderien2021年07月19日 17:27:44 +00:00Commented Jul 19, 2021 at 17:27 -
\$\begingroup\$ It's not common in the embedded or IoT worlds. There are no off the shelf builds of numpy for arm32 or big-endian architectures, both of which we have been asked to support. I'm planning to make it an optional dependency for the server side since numeric array tags have recently been added to the CBOR specs. In that case numpy is of course the best option. \$\endgroup\$Sekenre– Sekenre2021年07月19日 17:54:40 +00:00Commented Jul 19, 2021 at 17:54
Having studied further, I think the biggest thing I'm missing is a __hash__
method like this:
def __hash__(self):
if self.hbuf.readonly and self._hash is None:
self._hash = hash((self.__class__.__name__, self.hbuf.tobytes()))
return self._hash
elif self._hash is not None:
return self._hash
else:
raise ValueError('cannot hash, underlying bytes are read-write')