There are frozenset
in Python, but no frozendict
or frozenmap
. I tried to implement it (Don't care too much about class naming, because I want to try to imitate the standard library) with frozenset
, which implement O(1) time to find elements, and only calculates hash values according to all keys. The effect is almost perfect:
from typing import Any, NamedTuple
from collections.abc import Hashable, Mapping, Iterable, Iterator
from operator import itemgetter
from itertools import starmap, repeat, chain
_value = None
class kv_pair(NamedTuple):
key: Hashable
value: Any
def __eq__(self, other):
if isinstance(other, tuple):
return self.key == other[0]
elif isinstance(other, Hashable):
global _value
_value = self.value
return self.key == other
else:
return NotImplemented
def __hash__(self):
return hash(self.key)
def __str__(self):
return tuple.__repr__(self)
def __repr__(self):
return tuple.__repr__(self)
class frozenmap(Mapping):
__slots__ = '_frozen'
def __init__(self, iterable: Iterable = ()):
self._frozen = frozenset(starmap(kv_pair, iterable))
def keys(self) -> Iterable[Hashable]:
return frozenmap_keys(self._frozen)
def values(self) -> Iterable:
return frozenmap_values(self._frozen)
def items(self) -> Iterable[tuple[Hashable, Any]]:
return frozenmap_items(self._frozen)
def to_dict(self) -> dict:
return dict(self._frozen)
def get(self, key: Hashable, default=None) -> Any:
if key in self._frozen:
return _value
else:
return default
def copy(self) -> 'frozenmap':
return self.__class__(self._frozen)
@classmethod
def fromkeys(cls, keys, val=None) -> 'frozenmap':
return cls(zip(keys, repeat(val)))
@classmethod
def from_dict(cls, dct: dict) -> 'frozenmap':
return cls(dct.items())
def __copy__(self) -> 'frozenmap':
return self.__class__(self._frozen)
def __bool__(self) -> bool:
return bool(self._frozen)
def __iter__(self) -> Iterator[tuple[Hashable, Any]]:
return map(itemgetter(0), self._frozen)
def __len__(self) -> int:
return len(self._frozen)
def __str__(self) -> str:
return self.__class__.__name__ + '({' + ', '.join(f'{repr(k)}: {repr(v)}'for k, v in self._frozen) + '})'
def __repr__(self) -> str:
return self.__class__.__name__ + '({' + ', '.join(f'{repr(k)}: {repr(v)}'for k, v in self._frozen) + '})'
def __eq__(self, other) -> bool:
if isinstance(other, Mapping):
try:
return all(other[k] == v for k, v in self._frozen)
except KeyError:
return False
else:
return False
def __hash__(self) -> int:
return hash(self._frozen)
def __contains__(self, item: Hashable) -> bool:
return item in self._frozen
def __getitem__(self, key: Hashable) -> Any:
if key in self._frozen:
return _value
else:
raise KeyError(key)
def __or__(self, other) -> 'frozenmap':
if isinstance(other, frozenmap):
return self.__class__(chain(self._frozen, other._frozen))
elif isinstance(other, dict):
return self.__class__(chain(self._frozen, other.items()))
else:
return NotImplemented
def __ror__(self, other) -> dict:
if isinstance(other, dict):
return dict(chain(self._frozen, other.items()))
else:
return NotImplemented
class frozenmap_keys:
__slots__ = '_frozen'
def __init__(self, frozen: frozenset[kv_pair]):
self._frozen = frozen
def __str__(self) -> str:
return self.__class__.__name__ + '([' + ', '.join(repr(item.key) for item in self._frozen) + '])'
def __repr__(self) -> str:
return self.__class__.__name__ + '([' + ', '.join(repr(item.key) for item in self._frozen) + '])'
def __iter__(self) -> Iterator[Hashable]:
return map(itemgetter(0), self._frozen)
class frozenmap_values:
__slots__ = '_frozen'
def __init__(self, frozen: frozenset[kv_pair]):
self._frozen = frozen
def __str__(self) -> str:
return self.__class__.__name__ + '([' + ', '.join(repr(item.value) for item in self._frozen) + '])'
def __repr__(self) -> str:
return self.__class__.__name__ + '([' + ', '.join(repr(item.value) for item in self._frozen) + '])'
def __iter__(self) -> Iterator:
return map(itemgetter(1), self._frozen)
class frozenmap_items:
__slots__ = '_frozen'
def __init__(self, frozen: frozenset[kv_pair]):
self._frozen = frozen
def __str__(self) -> str:
return self.__class__.__name__ + '([' + ', '.join(map(repr, self._frozen)) + '])'
def __repr__(self) -> str:
return self.__class__.__name__ + '([' + ', '.join(map(repr, self._frozen)) + '])'
def __iter__(self) -> Iterator[tuple[Hashable, Any]]:
return map(tuple, self._frozen)
Some test:
>>> a = {'123': '456', '789': 123}
>>> b = {'123': 456, 789: '123'}
>>> fa = frozenmap.from_dict(a)
>>> fb = frozenmap.from_dict(b)
>>> fa
frozenmap({'123': '456', '789': 123})
>>> fb
frozenmap({789: '123', '123': 456})
>>> fa.items()
frozenmap_items([('123', '456'), ('789', 123)])
>>> fa.keys()
frozenmap_keys(['123', '789'])
>>> fa.values()
frozenmap_values(['456', 123])
>>> a | b
{'123': 456, '789': 123, 789: '123'}
>>> fa | b
frozenmap({'789': 123, '123': '456', 789: '123'})
>>> fa | fb
frozenmap({'123': '456', 789: '123', '789': 123})
>>> a | fb
{'123': '456', 789: '123', '789': 123}
>>> a == fa
True
>>> a | fb == fa | fb
True
>>> fa['789']
123
>>> fb['123']
456
>>> ddict = {fa: a, fb: b}
>>> ddict
{frozenmap({'789': 123, '123': '456'}): {'123': '456', '789': 123}, frozenmap({789: '123', '123': 456}): {'123': 456, 789: '123'}}
>>> ddict[fa]
{'123': '456', '789': 123}
The implementation of the whole class relies on storing the key value pairs into the frozenset
, and only judging the equality of the two key value pairs through the key.
But the implementation of __getitem__
method is not good, the key to implementing is to save the value of kv_pair
in a global variable when a key is compared with it, and then retrieve it from the global variable:
class kv_pair:
...
def __eq__(self, other):
if isinstance(other, tuple):
return self.key == other[0]
elif isinstance(other, Hashable):
global _value
_value = self.value
return self.key == other
else:
return NotImplemented
...
class frozenmap(Mapping):
...
def __getitem__(self, key: Hashable) -> Any:
if key in self._frozen:
return _value
else:
raise KeyError(key)
...
Although this should not be a problem in single threaded programs, this implementation makes me very uncomfortable. The root of the problem is that I can't get the reference of the element in the frozenset
according to the key from frozenset
. I haven't found a way to get it. Is it possible to get it? Or, is there a better way to implement frozenmap
? I found people always replace it with sorted tuple from Stack Overflow, but this will lose the advantage of dictionary O(1) time looking up elements.
-
2\$\begingroup\$ Without a concrete application I think this question is too abstract. \$\endgroup\$Reinderien– Reinderien2022年05月19日 13:58:01 +00:00Commented May 19, 2022 at 13:58
-
1\$\begingroup\$ Can you please post the unit test code as well? \$\endgroup\$pacmaninbw– pacmaninbw ♦2022年05月19日 14:49:42 +00:00Commented May 19, 2022 at 14:49
-
5\$\begingroup\$ I'm baffled that some people think this question is too abstract. The OP is writing library code with obvious use cases. No one here would complain about the absence of a "concrete application" if someone asked for a review of a BFS function, a merge sort, or a variety of other reinvent-the-wheel questions that people use to build their programming skill -- again, because such examples are obvious library code. \$\endgroup\$FMc– FMc2022年05月19日 17:21:33 +00:00Commented May 19, 2022 at 17:21
1 Answer 1
Library code cannot resort to global variable hacks. You are attempting to implement a data structure. That's a bedrock kind of library code. As such, it cannot resort to crazy mechanisms like setting a global variable as a sneaky workaround for a situation where you need to recover a key-value linkage that was severed during class instantiation.
When you find yourself tempted to do something crazy, stop and do some research. Python doesn't have a frozenmap, but it's not a new idea. PEP 416 explored the idea as a formal proposal. ActiveState has a code recipe for a frozendict. There are third-party frozendict implementations. Those links are fairly easy to read, understand, and learn from. In particular, the PEP contains links to a variety of other implementations worth looking at. While I explored the topic briefly, I also came across an idea for an immutable dict based on Python's new-ish MappingProxyType.
Unhelpful type proliferation: kv_pair, frozenmap_keys, frozenmap_values,
frozenmap_items. None of these types seem to help very much. Remember: a
frozenmap is immutable. Given that, a tuple would be a perfectly acceptable way
to represent its keys. Ditto for its values. And its items could just be a
tuple of tuples. As for kv_pair
, it's not doing anything useful other than
supporting the global variable hack via its unusual implementation of
__eq__()
.
You don't need to destroy the underlying key-value mappings. You can receive the inputs, build a dict, tuck that dict away as a quasi-private attribute, and then implement the rest of the needed behavior: hashing, iteration, lookup, raising on attempts to mutate, and so forth. Some of the implementations noted above achieve such things by inheriting from dict, disabling methods that mutate, and implementing a hashing approach. Others do it by building a class from the ground up with the desired dict API. Either way, you can still use a Python dict behind the scenes for underlying data storage. Alternatively, if you really want to go hardcore, implement your own hashmap for storage. Either way, you'll be using something dict-like (rather than destroying the relationships and then hacking your way back to recover lost linkages).