An IEEE half-float implementation in python similar to array.array, is there any way I could make this more efficient?

Question 1

I've written this class to wrap a collection of bytes and interpret them as 16-bit floats. It's supposed to work like memoryview(buf).cast('f') or array.array('f', buf) I'm trying to avoid converting back and forth between values as much as possible. cPython does not currently support using the format code 'e' as the format argument to array.array.

The motivation is to support 16bit floating point arrays from this RFC: https://www.rfc-editor.org/rfc/rfc8746.html#name-types-of-numbers as part of this decoder: https://github.com/agronholm/cbor2

Is there anything else I can add or take away?

import struct
from collections.abc import Sequence
class Float16Array(Sequence):
 """
 Takes a bytes or bytearray object and interprets it as an array of
 16-bit IEEE half-floats
 Behaves a bit like if you could create an array.array('e', [1, 2, 3.7])
 """
 def __init__(self, buf):
 self.hbuf = memoryview(buf).cast('H')
 @staticmethod
 def _to_h(v):
 "convert float to an unsigned 16 bit integer representation"
 return struct.unpack('H', struct.pack('e', v))[0]
 @staticmethod
 def _to_v(h):
 "convert 16-bit integer back to regular float"
 return struct.unpack('e', struct.pack('H', h))[0]
 def __len__(self):
 return len(self.hbuf)
 def __eq__(self, other):
 if isinstance(other, self.__class__):
 return self.hbuf == other.hbuf
 if isinstance(other, Sequence):
 if len(self) != len(other):
 return False
 for hval, oval in zip(self.hbuf, other):
 try:
 if hval != self._to_h(oval):
 return False
 except struct.error:
 return False
 return True
 else:
 raise NotImplemented
 def __getitem__(self, key):
 if isinstance(key, slice):
 return self.__class__(self.hbuf[key].cast('B'))
 item = self.hbuf[key]
 return self._to_v(item)
 def __contains__(self, value):
 try:
 return self._to_h(value) in self.hbuf
 except struct.error:
 return False
 def __reversed__(self):
 for item in reversed(self.hbuf):
 yield self._to_v(item)
 def index(self, value, start=0, stop=None):
 buf = self.hbuf[start:stop]
 try:
 buf_val = self._to_h(value)
 except struct.error:
 raise TypeError('value must be float or int') from None
 for i, v in enumerate(buf):
 if v is buf_val or v == buf_val:
 return i
 raise ValueError
 def count(self, value):
 try:
 buf_val = self._to_h(value)
 except struct.error:
 raise TypeError('value must be float or int') from None
 return sum(1 for v in self.hbuf if v == buf_val)
 def __repr__(self):
 contents = ', '.join('{:.2f}'.format(v).rstrip('0') for v in self)
 return self.__class__.__name__ + '(' + contents + ')'
if __name__ == '__main__':
 my_array = Float16Array(struct.pack('eeee', 0.1, 0.1, 72.0, 3.141))
 assert 0.1 in my_array
 assert my_array.count(72) == 1
 assert my_array.count(0.1)
 assert my_array == [0.1, 0.1, 72.0, 3.141]
 print(list(reversed(my_array)))
 print(my_array)
 assert my_array[0:-1] == Float16Array(struct.pack('eee', 0.1, 0.1, 72.0))

Question 2

Why are you packing like this? Embedded application, network application, etc.

Question 3

@Reinderien This is for packed arrays from this specification rfc-editor.org/rfc/rfc8746.html#name-types-of-numbers and is intended to be an example of an extended data type for this decoder: github.com/agronholm/cbor2 I will add this to the question.

Question 4

Add PEP484 type hints
In your to_h and to_v, consider tuple-unpacking the return value from struct to get a free assertion that there is only one item
NotImplemented is not a very friendly way to handle comparison of disparate types. I would far sooner expect return False.
Move your testing code into a function to avoid namespace pollution
Single-quotes are not standard for docstrings; use triple quotes instead
your __repr__ was broken and used __name where it needed __name__
Have you considered replacing most of this with a Numpy half-precision array created via frombuffer?

Suggested

import struct
from collections.abc import Sequence
from typing import Union, Any, Iterable, Optional
class Float16Array(Sequence):
 """
 Takes a bytes or bytearray object and interprets it as an array of
 16-bit IEEE half-floats
 Behaves a bit like if you could create an array.array('e', [1, 2, 3.7])
 """
 def __init__(self, buf: bytes):
 self.hbuf = memoryview(buf).cast('H')
 @staticmethod
 def _to_h(v: float) -> int:
 """convert float to an unsigned 16 bit integer representation"""
 i, = struct.unpack('H', struct.pack('e', v))
 return i
 @staticmethod
 def _to_v(h: int) -> float:
 """convert 16-bit integer back to regular float"""
 f, = struct.unpack('e', struct.pack('H', h))
 return f
 def __len__(self) -> int:
 return len(self.hbuf)
 def __eq__(self, other: Any) -> bool:
 if isinstance(other, self.__class__):
 return self.hbuf == other.hbuf
 if not isinstance(other, Sequence):
 return False
 if len(self) != len(other):
 return False
 for hval, oval in zip(self.hbuf, other):
 try:
 if hval != self._to_h(oval):
 return False
 except struct.error:
 return False
 return True
 def __getitem__(self, key: Union[int, slice]) -> float:
 if isinstance(key, slice):
 return self.__class__(self.hbuf[key].cast('B'))
 item = self.hbuf[key]
 return self._to_v(item)
 def __contains__(self, value: float) -> bool:
 try:
 return self._to_h(value) in self.hbuf
 except struct.error:
 return False
 def __reversed__(self) -> Iterable[float]:
 for item in reversed(self.hbuf):
 yield self._to_v(item)
 def index(self, value: float, start: int = 0, stop: Optional[int] = None) -> int:
 buf = self.hbuf[start:stop]
 try:
 buf_val = self._to_h(value)
 except struct.error:
 raise TypeError('value must be float or int') from None
 for i, v in enumerate(buf):
 if v is buf_val or v == buf_val:
 return i
 raise ValueError
 def count(self, value: Union[float, int]) -> int:
 try:
 buf_val = self._to_h(value)
 except struct.error:
 raise TypeError('value must be float or int') from None
 return sum(1 for v in self.hbuf if v == buf_val)
 def __repr__(self) -> str:
 contents = ', '.join('{:.2f}'.format(v).rstrip('0') for v in self)
 return f'{self.__class__.__name__}({contents})'
def test():
 my_array = Float16Array(struct.pack('eeee', 0.1, 0.1, 72.0, 3.141))
 assert 0.1 in my_array
 assert my_array.count(72) == 1
 assert my_array.count(0.1)
 assert my_array == [0.1, 0.1, 72.0, 3.141]
 print(list(reversed(my_array)))
 print(my_array)
 assert my_array[0:-1] == Float16Array(struct.pack('eee', 0.1, 0.1, 72.0))
if __name__ == '__main__':
 test()

Suggested (numpy)

No custom code; the equivalent test is:

def test_new():
 data = (0.1, 0.1, 72.0, 3.141)
 my_array = np.array(data, dtype=np.float16)
 assert 0.1 in my_array
 assert np.sum(my_array == 72) == 1
 assert np.sum(my_array == 0.1) == 2
 assert np.all(np.isclose(my_array, data, rtol=1e-4, atol=1e-4))
 print(my_array[::-1])
 print(my_array)
 assert np.all(np.isclose(
 my_array[:-1],
 np.array(data[:-1], dtype=np.float16),
 ))

Question 5

That's great thank you! In most cases I would use Numpy, but I didn't want to add additional dependencies to the library I'm working on (except as optional extras)

Question 6

That doesn't sound like very solid rationale. numpy is a very common secondary dependency in the pip ecosystem, and in fact it's likely that some of your users would want numpy compatibility for other reasons anyway.

Question 7

It's not common in the embedded or IoT worlds. There are no off the shelf builds of numpy for arm32 or big-endian architectures, both of which we have been asked to support. I'm planning to make it an optional dependency for the server side since numeric array tags have recently been added to the CBOR specs. In that case numpy is of course the best option.

Question 8

Having studied further, I think the biggest thing I'm missing is a __hash__ method like this:

def __hash__(self):
 if self.hbuf.readonly and self._hash is None:
 self._hash = hash((self.__class__.__name__, self.hbuf.tobytes()))
 return self._hash
 elif self._hash is not None:
 return self._hash
 else:
 raise ValueError('cannot hash, underlying bytes are read-write')

Reinderien Reinderien 70.9k5 gold badges76 silver badges256 bronze badges · Accepted Answer · 2021-07-06 20:24:10Z

Add PEP484 type hints
In your to_h and to_v, consider tuple-unpacking the return value from struct to get a free assertion that there is only one item
NotImplemented is not a very friendly way to handle comparison of disparate types. I would far sooner expect return False.
Move your testing code into a function to avoid namespace pollution
Single-quotes are not standard for docstrings; use triple quotes instead
your __repr__ was broken and used __name where it needed __name__
Have you considered replacing most of this with a Numpy half-precision array created via frombuffer?

Suggested

import struct
from collections.abc import Sequence
from typing import Union, Any, Iterable, Optional
class Float16Array(Sequence):
 """
 Takes a bytes or bytearray object and interprets it as an array of
 16-bit IEEE half-floats
 Behaves a bit like if you could create an array.array('e', [1, 2, 3.7])
 """
 def __init__(self, buf: bytes):
 self.hbuf = memoryview(buf).cast('H')
 @staticmethod
 def _to_h(v: float) -> int:
 """convert float to an unsigned 16 bit integer representation"""
 i, = struct.unpack('H', struct.pack('e', v))
 return i
 @staticmethod
 def _to_v(h: int) -> float:
 """convert 16-bit integer back to regular float"""
 f, = struct.unpack('e', struct.pack('H', h))
 return f
 def __len__(self) -> int:
 return len(self.hbuf)
 def __eq__(self, other: Any) -> bool:
 if isinstance(other, self.__class__):
 return self.hbuf == other.hbuf
 if not isinstance(other, Sequence):
 return False
 if len(self) != len(other):
 return False
 for hval, oval in zip(self.hbuf, other):
 try:
 if hval != self._to_h(oval):
 return False
 except struct.error:
 return False
 return True
 def __getitem__(self, key: Union[int, slice]) -> float:
 if isinstance(key, slice):
 return self.__class__(self.hbuf[key].cast('B'))
 item = self.hbuf[key]
 return self._to_v(item)
 def __contains__(self, value: float) -> bool:
 try:
 return self._to_h(value) in self.hbuf
 except struct.error:
 return False
 def __reversed__(self) -> Iterable[float]:
 for item in reversed(self.hbuf):
 yield self._to_v(item)
 def index(self, value: float, start: int = 0, stop: Optional[int] = None) -> int:
 buf = self.hbuf[start:stop]
 try:
 buf_val = self._to_h(value)
 except struct.error:
 raise TypeError('value must be float or int') from None
 for i, v in enumerate(buf):
 if v is buf_val or v == buf_val:
 return i
 raise ValueError
 def count(self, value: Union[float, int]) -> int:
 try:
 buf_val = self._to_h(value)
 except struct.error:
 raise TypeError('value must be float or int') from None
 return sum(1 for v in self.hbuf if v == buf_val)
 def __repr__(self) -> str:
 contents = ', '.join('{:.2f}'.format(v).rstrip('0') for v in self)
 return f'{self.__class__.__name__}({contents})'
def test():
 my_array = Float16Array(struct.pack('eeee', 0.1, 0.1, 72.0, 3.141))
 assert 0.1 in my_array
 assert my_array.count(72) == 1
 assert my_array.count(0.1)
 assert my_array == [0.1, 0.1, 72.0, 3.141]
 print(list(reversed(my_array)))
 print(my_array)
 assert my_array[0:-1] == Float16Array(struct.pack('eee', 0.1, 0.1, 72.0))
if __name__ == '__main__':
 test()

Suggested (numpy)

No custom code; the equivalent test is:

def test_new():
 data = (0.1, 0.1, 72.0, 3.141)
 my_array = np.array(data, dtype=np.float16)
 assert 0.1 in my_array
 assert np.sum(my_array == 72) == 1
 assert np.sum(my_array == 0.1) == 2
 assert np.all(np.isclose(my_array, data, rtol=1e-4, atol=1e-4))
 print(my_array[::-1])
 print(my_array)
 assert np.all(np.isclose(
 my_array[:-1],
 np.array(data[:-1], dtype=np.float16),
 ))

That's great thank you! In most cases I would use Numpy, but I didn't want to add additional dependencies to the library I'm working on (except as optional extras)
That doesn't sound like very solid rationale. numpy is a very common secondary dependency in the pip ecosystem, and in fact it's likely that some of your users would want numpy compatibility for other reasons anyway.
It's not common in the embedded or IoT worlds. There are no off the shelf builds of numpy for arm32 or big-endian architectures, both of which we have been asked to support. I'm planning to make it an optional dependency for the server side since numeric array tags have recently been added to the CBOR specs. In that case numpy is of course the best option.

Stack Exchange Network

An IEEE half-float implementation in python similar to array.array, is there any way I could make this more efficient?

2 Answers 2

Suggested

Suggested (numpy)

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

An IEEE half-float implementation in python similar to array.array, is there any way I could make this more efficient?

2 Answers 2

Suggested

Suggested (numpy)

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions