/* PSPP - a program for statistical analysis. Copyright (C) 2008, 2009, 2010, 2011 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ /* Hash table with separate chaining. This header (hmapx.h) supplies an "external" implementation of a hash table that uses linked lists to resolve collisions ("separate chaining"). Its companion header (hmap.h) supplies a "embedded" implementation that is otherwise similar. The two variants are described briefly here. The external variant, for which this is the header, is described in slightly more detail below. Each function also has a detailed usage comment at its point of definition. (Many of those definitions are inline in this file, because they are so simple. Others are in hmapx.c.) The "hmap" embedded hash table implementation puts the hash table node (which includes the linked list used for resolving collisions) within the data structure that the hash table contains. This makes allocation efficient, in space and time, because no additional call into an allocator is needed to obtain memory for the hash table node. It also makes it easy to find the hash table node associated with a given object. However, it's difficult to include a given object in an arbitrary number of hash tables. The "hmapx" external hash table implementation allocates hash table nodes separately from the objects in the hash table. Inserting and removing hash table elements requires dynamic allocation, so it is normally slower and takes more memory than the embedded implementation. It also requires searching the table to find the node associated with a given object. However, it's easy to include a given object in an arbitrary number of hash tables. It's also possible to create an external hash table without adding a member to the data structure that the hash table contains. */ #ifndef LIBPSPP_HMAPX_H #define LIBPSPP_HMAPX_H 1 /* External hash table with separate chaining. To create an external hash table, declare an instance of struct hmapx, then initialize it with hmapx_init(): struct hmapx map; hmapx_init (&map); or, alternatively: struct hmapx map = HMAPX_INITIALIZER (map); An hmapx data structure contains data represented as void *. The hmapx_insert() function inserts such a datum and returns the address of a newly created struct hmapx_node that represents the new element: struct foo { const char *key; const char *value; }; struct foo foo = {"key", "value"}; struct hmapx_node *node; node = hmapx_insert (&map, &foo, hsh_hash_string (foo.key)); The element's hash value must be passed as one of hmapx_insert()'s arguments. The hash table saves this hash value for use later to speed searches and to rehash as the hash table grows. hmapx_insert() does not check whether the newly inserted element duplicates an element already in the hash table. The client is responsible for doing so, if this is desirable. Use hmapx_delete() to delete an element from the hash table, passing in its hmapx_node: hmapx_delete (&map, node); Deleting an element frees its node. The hash table does not provide a direct way to search for an existing element. Instead, it provides the means to iterate over all the elements in the hash table with a given hash value. It is easy to compose a search function from such a building block. For example: struct hmapx_node * find_node (const struct hmapx *map, const char *target) { struct hmapx_node *node; struct foo *foo; HMAPX_FOR_EACH_WITH_HASH (foo, node, hsh_hash_string (target), map) if (!strcmp (foo->key, target)) break; return node; } This function's client can extract the data item from the returned hmapx_node using the hmapx_node_data() function. The hmapx_node can also be useful directly as an argument to other hmapx functions, such as hmapx_delete(). Here is how to iterate through the elements currently in the hash table: struct hmapx_node *node; const char *string; HMAPX_FOR_EACH (data, node, &map) { ...do something with string... } */ #include "libpspp/hmap.h" #include /* Hash table node. */ struct hmapx_node { struct hmap_node hmap_node; /* Underlying hash node. */ void *data; /* User data. */ }; static inline void *hmapx_node_data (const struct hmapx_node *); static inline size_t hmapx_node_hash (const struct hmapx_node *); /* Hash table. */ struct hmapx { struct hmap hmap; }; /* Suitable for use as the initializer for a struct hmapx named MAP. Typical usage: struct hmap map = HMAPX_INITIALIZER (map); HMAPX_INITIALIZER() is an alternative to hmapx_init(). */ #define HMAPX_INITIALIZER(MAP) { HMAP_INITIALIZER (MAP.hmap) } /* Creation and destruction. */ static inline void hmapx_init (struct hmapx *); static inline void hmapx_swap (struct hmapx *, struct hmapx *); void hmapx_clear (struct hmapx *); void hmapx_destroy (struct hmapx *); /* Storage management. */ static inline void hmapx_reserve (struct hmapx *, size_t capacity); static inline void hmapx_shrink (struct hmapx *); /* Search. */ static inline struct hmapx_node *hmapx_first_with_hash (struct hmapx *, size_t hash); static inline struct hmapx_node *hmapx_next_with_hash (struct hmapx_node *); /* Insertion and deletion. */ struct hmapx_node *hmapx_insert (struct hmapx *, void *, size_t hash); struct hmapx_node *hmapx_insert_fast (struct hmapx *, void *, size_t hash); static inline void hmapx_delete (struct hmapx *, struct hmapx_node *); /* Iteration. */ static inline struct hmapx_node *hmapx_first (const struct hmapx *); static inline struct hmapx_node *hmapx_next (const struct hmapx *, const struct hmapx_node *); /* Counting. */ static inline bool hmapx_is_empty (const struct hmapx *); static inline size_t hmapx_count (const struct hmapx *); static inline size_t hmapx_capacity (const struct hmapx *); /* Updating data elements. */ static inline void hmapx_change (struct hmapx *, struct hmapx_node *, void *, size_t new_hash); static inline void hmapx_changed (struct hmapx *, struct hmapx_node *, size_t new_hash); static inline void hmapx_move (struct hmapx_node *, void *); /* Convenience macros for search. These macros automatically use hmapx_node_data() to obtain the data elements that encapsulate hmap nodes, which often saves typing and can make code easier to read. Refer to the large comment near the top of this file for an example. These macros evaluate HASH only once. They evaluate their other arguments many times. */ #define HMAPX_FOR_EACH_WITH_HASH(DATA, NODE, HASH, HMAPX) \ for ((NODE) = hmapx_first_with_hash (HMAPX, HASH); \ (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0; \ (NODE) = hmapx_next_with_hash (NODE)) #define HMAPX_FOR_EACH_WITH_HASH_SAFE(DATA, NODE, NEXT, HASH, HMAPX) \ for ((NODE) = hmapx_first_with_hash (HMAPX, HASH); \ ((NODE) != NULL \ ? ((DATA) = hmapx_node_data (NODE), \ (NEXT) = hmapx_next_with_hash (NODE), \ 1) \ : 0); \ (NODE) = (NEXT)) /* Convenience macros for iteration. These macros automatically use hmapx_node_data() to obtain the data elements that encapsulate hmap nodes, which often saves typing and can make code easier to read. Refer to the large comment near the top of this file for an example. These macros evaluate their arguments many times. */ #define HMAPX_FOR_EACH(DATA, NODE, HMAPX) \ for ((NODE) = hmapx_first (HMAPX); \ (NODE) != NULL ? ((DATA) = hmapx_node_data (NODE), 1) : 0; \ (NODE) = hmapx_next (HMAPX, NODE)) #define HMAPX_FOR_EACH_SAFE(DATA, NODE, NEXT, HMAPX) \ for ((NODE) = hmapx_first (HMAPX); \ ((NODE) != NULL \ ? ((DATA) = hmapx_node_data (NODE), \ (NEXT) = hmapx_next (HMAPX, NODE), \ 1) \ : 0); \ (NODE) = (NEXT)) /* Inline definitions. */ /* Returns the data stored in NODE. */ static inline void * hmapx_node_data (const struct hmapx_node *node) { return node->data; } /* Returns the hash value stored in NODE */ static inline size_t hmapx_node_hash (const struct hmapx_node *node) { return hmap_node_hash (&node->hmap_node); } /* Initializes MAP as a new hash map that is initially empty. */ static inline void hmapx_init (struct hmapx *map) { hmap_init (&map->hmap); } /* Exchanges the contents of hash maps A and B. */ static inline void hmapx_swap (struct hmapx *a, struct hmapx *b) { hmap_swap (&a->hmap, &b->hmap); } /* Ensures that MAP has sufficient space to store at least CAPACITY data elements, allocating a new set of buckets and rehashing if necessary. */ static inline void hmapx_reserve (struct hmapx *map, size_t capacity) { hmap_reserve (&map->hmap, capacity); } /* Shrinks MAP's set of buckets to the minimum number needed to store its current number of elements, allocating a new set of buckets and rehashing if that would save space. */ static inline void hmapx_shrink (struct hmapx *map) { hmap_shrink (&map->hmap); } /* Returns the first node in MAP that has hash value HASH, or a null pointer if MAP does not contain any node with that hash value. Assuming uniform hashing and no duplicate data items in MAP, this function runs in constant time. (Amortized over an iteration over all data items with a given HASH, its runtime is proportional to the length of the hash chain for HASH, so given a pathological hash function, e.g. one that returns a constant value, its runtime degenerates to linear in the length of NODE's hash chain.) Nodes are returned in arbitrary order that may change whenever the hash table's current capacity changes, as reported by hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(), and hmapx_shrink() can change the capacity of a hash map. Inserting a node with hmapx_insert_fast() or deleting one with hmapx_delete() will not change the relative ordering of nodes. The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE macros provide convenient ways to iterate over all the nodes with a given hash. */ static inline struct hmapx_node * hmapx_first_with_hash (struct hmapx *map, size_t hash) { return HMAP_FIRST_WITH_HASH (struct hmapx_node, hmap_node, &map->hmap, hash); } /* Returns the next node in MAP after NODE that has the same hash value as NODE, or a null pointer if MAP does not contain any more nodes with that hash value. Assuming uniform hashing and no duplicate data items in MAP, this function runs in constant time. (Amortized over an iteration over all data items with a given HASH, its runtime is proportional to the length of the hash chain for HASH, so given a pathological hash function, e.g. one that returns a constant value, its runtime degenerates to linear in the length of NODE's hash chain.) Nodes are returned in arbitrary order that may change whenever the hash table's current capacity changes, as reported by hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(), and hmapx_shrink() can change the capacity of a hash map. Inserting a node with hmapx_insert_fast() or deleting one with hmapx_delete() will not change the relative ordering of nodes. The HMAPX_FOR_EACH_WITH_HASH and HMAPX_FOR_EACH_WITH_HASH_SAFE macros provide convenient ways to iterate over all the nodes with a given hash. */ static inline struct hmapx_node * hmapx_next_with_hash (struct hmapx_node *node) { return HMAP_NEXT_WITH_HASH (node, struct hmapx_node, hmap_node); } /* Removes NODE from MAP and frees NODE. The client is responsible for freeing the user data associated with NODE, if appropriate. Assuming uniform hashing, this function runs in constant time. (Its runtime is proportional to the position of NODE in its hash chain, so given a pathological hash function, e.g. one that returns a constant value, its runtime degenerates to linear in the length of NODE's hash chain.) This function never reduces the number of buckets in MAP. When one deletes a large number of nodes from a hash table, calling hmapx_shrink() afterward may therefore save a small amount of memory. It is also more expensive to iterate through a very sparse hash table than a denser one, so shrinking the hash table could also save some time. However, rehashing has an immediate cost that must be weighed against these benefits. hmapx_delete() does not change NODE's hash value reported by hmapx_node_hash(). */ static inline void hmapx_delete (struct hmapx *map, struct hmapx_node *node) { hmap_delete (&map->hmap, &node->hmap_node); free (node); } /* Returns the first node in MAP, or a null pointer if MAP is empty. Amortized over iterating through every data element in MAP, this function runs in constant time. However, this assumes that MAP is not excessively sparse, that is, that hmapx_capacity(MAP) is at most a constant factor greater than hmapx_count(MAP). This will always be true unless many nodes have been inserted into MAP and then most or all of them deleted; in such a case, calling hmapx_shrink() is advised. Nodes are returned in arbitrary order that may change whenever the hash table's current capacity changes, as reported by hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(), and hmapx_shrink() can change the capacity of a hash map. Inserting a node with hmapx_insert_fast() or deleting one with hmapx_delete() will not change the relative ordering of nodes. The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide convenient ways to iterate over all the nodes in a hash map. */ static inline struct hmapx_node * hmapx_first (const struct hmapx *map) { return HMAP_FIRST (struct hmapx_node, hmap_node, &map->hmap); } /* Returns the next node in MAP following NODE, or a null pointer if NODE is the last node in MAP. Amortized over iterating through every data element in MAP, this function runs in constant time. However, this assumes that MAP is not excessively sparse, that is, that hmapx_capacity(MAP) is at most a constant factor greater than hmapx_count(MAP). This will always be true unless many nodes have been inserted into MAP and then most or all of them deleted; in such a case, calling hmapx_shrink() is advised. Nodes are returned in arbitrary order that may change whenever the hash table's current capacity changes, as reported by hmapx_capacity(). Calls to hmapx_insert(), hmapx_reserve(), and hmapx_shrink() can change the capacity of a hash map. Inserting a node with hmapx_insert_fast() or deleting one with hmapx_delete() will not change the relative ordering of nodes. The HMAPX_FOR_EACH and HMAPX_FOR_EACH_SAFE macros provide convenient ways to iterate over all the nodes in a hash map. */ static inline struct hmapx_node * hmapx_next (const struct hmapx *map, const struct hmapx_node *node) { return HMAP_NEXT (node, struct hmapx_node, hmap_node, &map->hmap); } /* Returns true if MAP currently contains no data items, false otherwise. */ static inline bool hmapx_is_empty (const struct hmapx *map) { return hmap_is_empty (&map->hmap); } /* Returns the number of data items currently in MAP. */ static inline size_t hmapx_count (const struct hmapx *map) { return hmap_count (&map->hmap); } /* Returns the current capacity of MAP, that is, the maximum number of data elements that MAP may hold before it becomes advisable to rehash. The capacity is advisory only: it is possible to insert any number of data elements into a hash map regardless of its capacity. However, inserting many more elements than the map's capacity will degrade search performance. */ static inline size_t hmapx_capacity (const struct hmapx *map) { return hmap_capacity (&map->hmap); } /* Changes NODE's data to DATA and its hash value to NEW_HASH. NODE must reside in MAP. This function does not verify that MAP does not already contain a data item that duplicates DATA. If duplicates should be disallowed (which is the usual case), then the client must check for duplicates before changing NODE's value. */ static inline void hmapx_change (struct hmapx *map, struct hmapx_node *node, void *data, size_t new_hash) { hmapx_move (node, data); hmapx_changed (map, node, new_hash); } /* Moves NODE around in MAP to compensate for its hash value having changed to NEW_HASH. This function does not verify that MAP does not already contain a data item that duplicates the new value of NODE's data. If duplicates should be disallowed (which is the usual case), then the client must check for duplicates before changing NODE's value. */ static inline void hmapx_changed (struct hmapx *map, struct hmapx_node *node, size_t new_hash) { hmap_changed (&map->hmap, &node->hmap_node, new_hash); } /* Updates NODE to compensate for its data item having moved around in memory to new location DATA. The data item's value and hash value should not have changed. (If they have changed, call hmapx_change() instead.) */ static inline void hmapx_move (struct hmapx_node *node, void *data) { node->data = data; } #endif /* libpspp/hmapx.h */

AltStyle によって変換されたページ (->オリジナル) /