1 | /* hash - hashing table processing.
|
---|
2 | Copyright (C) 1998-1999, 2001, 2003, 2009-2021 Free Software Foundation,
|
---|
3 | Inc.
|
---|
4 | Written by Jim Meyering <[email protected]>, 1998.
|
---|
5 |
|
---|
6 | This file is free software: you can redistribute it and/or modify
|
---|
7 | it under the terms of the GNU Lesser General Public License as
|
---|
8 | published by the Free Software Foundation; either version 2.1 of the
|
---|
9 | License, or (at your option) any later version.
|
---|
10 |
|
---|
11 | This file is distributed in the hope that it will be useful,
|
---|
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
14 | GNU Lesser General Public License for more details.
|
---|
15 |
|
---|
16 | You should have received a copy of the GNU Lesser General Public License
|
---|
17 | along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
---|
18 |
|
---|
19 | /* A generic hash table package. */
|
---|
20 |
|
---|
21 | /* Make sure USE_OBSTACK is defined to 1 if you want the allocator to use
|
---|
22 | obstacks instead of malloc, and recompile 'hash.c' with same setting. */
|
---|
23 |
|
---|
24 | #ifndef HASH_H_
|
---|
25 | # define HASH_H_
|
---|
26 |
|
---|
27 | # include <stdio.h>
|
---|
28 | # include <stdbool.h>
|
---|
29 |
|
---|
30 | # ifdef __cplusplus
|
---|
31 | extern "C" {
|
---|
32 | # endif
|
---|
33 |
|
---|
34 | struct hash_tuning
|
---|
35 | {
|
---|
36 | /* This structure is mainly used for 'hash_initialize', see the block
|
---|
37 | documentation of 'hash_reset_tuning' for more complete comments. */
|
---|
38 |
|
---|
39 | float shrink_threshold; /* ratio of used buckets to trigger a shrink */
|
---|
40 | float shrink_factor; /* ratio of new smaller size to original size */
|
---|
41 | float growth_threshold; /* ratio of used buckets to trigger a growth */
|
---|
42 | float growth_factor; /* ratio of new bigger size to original size */
|
---|
43 | bool is_n_buckets; /* if CANDIDATE really means table size */
|
---|
44 | };
|
---|
45 |
|
---|
46 | typedef struct hash_tuning Hash_tuning;
|
---|
47 |
|
---|
48 | struct hash_table;
|
---|
49 |
|
---|
50 | typedef struct hash_table Hash_table;
|
---|
51 |
|
---|
52 | /*
|
---|
53 | * Information and lookup.
|
---|
54 | */
|
---|
55 |
|
---|
56 | /* The following few functions provide information about the overall hash
|
---|
57 | table organization: the number of entries, number of buckets and maximum
|
---|
58 | length of buckets. */
|
---|
59 |
|
---|
60 | /* Return the number of buckets in the hash table. The table size, the total
|
---|
61 | number of buckets (used plus unused), or the maximum number of slots, are
|
---|
62 | the same quantity. */
|
---|
63 | extern size_t hash_get_n_buckets (const Hash_table *table)
|
---|
64 | _GL_ATTRIBUTE_PURE;
|
---|
65 |
|
---|
66 | /* Return the number of slots in use (non-empty buckets). */
|
---|
67 | extern size_t hash_get_n_buckets_used (const Hash_table *table)
|
---|
68 | _GL_ATTRIBUTE_PURE;
|
---|
69 |
|
---|
70 | /* Return the number of active entries. */
|
---|
71 | extern size_t hash_get_n_entries (const Hash_table *table)
|
---|
72 | _GL_ATTRIBUTE_PURE;
|
---|
73 |
|
---|
74 | /* Return the length of the longest chain (bucket). */
|
---|
75 | extern size_t hash_get_max_bucket_length (const Hash_table *table)
|
---|
76 | _GL_ATTRIBUTE_PURE;
|
---|
77 |
|
---|
78 | /* Do a mild validation of a hash table, by traversing it and checking two
|
---|
79 | statistics. */
|
---|
80 | extern bool hash_table_ok (const Hash_table *table)
|
---|
81 | _GL_ATTRIBUTE_PURE;
|
---|
82 |
|
---|
83 | extern void hash_print_statistics (const Hash_table *table, FILE *stream);
|
---|
84 |
|
---|
85 | /* If ENTRY matches an entry already in the hash table, return the
|
---|
86 | entry from the table. Otherwise, return NULL. */
|
---|
87 | extern void *hash_lookup (const Hash_table *table, const void *entry);
|
---|
88 |
|
---|
89 | /*
|
---|
90 | * Walking.
|
---|
91 | */
|
---|
92 |
|
---|
93 | /* The functions in this page traverse the hash table and process the
|
---|
94 | contained entries. For the traversal to work properly, the hash table
|
---|
95 | should not be resized nor modified while any particular entry is being
|
---|
96 | processed. In particular, entries should not be added, and an entry
|
---|
97 | may be removed only if there is no shrink threshold and the entry being
|
---|
98 | removed has already been passed to hash_get_next. */
|
---|
99 |
|
---|
100 | /* Return the first data in the table, or NULL if the table is empty. */
|
---|
101 | extern void *hash_get_first (const Hash_table *table)
|
---|
102 | _GL_ATTRIBUTE_PURE;
|
---|
103 |
|
---|
104 | /* Return the user data for the entry following ENTRY, where ENTRY has been
|
---|
105 | returned by a previous call to either 'hash_get_first' or 'hash_get_next'.
|
---|
106 | Return NULL if there are no more entries. */
|
---|
107 | extern void *hash_get_next (const Hash_table *table, const void *entry);
|
---|
108 |
|
---|
109 | /* Fill BUFFER with pointers to active user entries in the hash table, then
|
---|
110 | return the number of pointers copied. Do not copy more than BUFFER_SIZE
|
---|
111 | pointers. */
|
---|
112 | extern size_t hash_get_entries (const Hash_table *table, void **buffer,
|
---|
113 | size_t buffer_size);
|
---|
114 |
|
---|
115 | typedef bool (*Hash_processor) (void *entry, void *processor_data);
|
---|
116 |
|
---|
117 | /* Call a PROCESSOR function for each entry of a hash table, and return the
|
---|
118 | number of entries for which the processor function returned success. A
|
---|
119 | pointer to some PROCESSOR_DATA which will be made available to each call to
|
---|
120 | the processor function. The PROCESSOR accepts two arguments: the first is
|
---|
121 | the user entry being walked into, the second is the value of PROCESSOR_DATA
|
---|
122 | as received. The walking continue for as long as the PROCESSOR function
|
---|
123 | returns nonzero. When it returns zero, the walking is interrupted. */
|
---|
124 | extern size_t hash_do_for_each (const Hash_table *table,
|
---|
125 | Hash_processor processor, void *processor_data);
|
---|
126 |
|
---|
127 | /*
|
---|
128 | * Allocation and clean-up.
|
---|
129 | */
|
---|
130 |
|
---|
131 | /* Return a hash index for a NUL-terminated STRING between 0 and N_BUCKETS-1.
|
---|
132 | This is a convenience routine for constructing other hashing functions. */
|
---|
133 | extern size_t hash_string (const char *string, size_t n_buckets)
|
---|
134 | _GL_ATTRIBUTE_PURE;
|
---|
135 |
|
---|
136 | extern void hash_reset_tuning (Hash_tuning *tuning);
|
---|
137 |
|
---|
138 | typedef size_t (*Hash_hasher) (const void *entry, size_t table_size);
|
---|
139 | typedef bool (*Hash_comparator) (const void *entry1, const void *entry2);
|
---|
140 | typedef void (*Hash_data_freer) (void *entry);
|
---|
141 |
|
---|
142 | /* Reclaim all storage associated with a hash table. If a data_freer
|
---|
143 | function has been supplied by the user when the hash table was created,
|
---|
144 | this function applies it to the data of each entry before freeing that
|
---|
145 | entry. */
|
---|
146 | extern void hash_free (Hash_table *table);
|
---|
147 |
|
---|
148 | /* Allocate and return a new hash table, or NULL upon failure. The initial
|
---|
149 | number of buckets is automatically selected so as to _guarantee_ that you
|
---|
150 | may insert at least CANDIDATE different user entries before any growth of
|
---|
151 | the hash table size occurs. So, if have a reasonably tight a-priori upper
|
---|
152 | bound on the number of entries you intend to insert in the hash table, you
|
---|
153 | may save some table memory and insertion time, by specifying it here. If
|
---|
154 | the IS_N_BUCKETS field of the TUNING structure is true, the CANDIDATE
|
---|
155 | argument has its meaning changed to the wanted number of buckets.
|
---|
156 |
|
---|
157 | TUNING points to a structure of user-supplied values, in case some fine
|
---|
158 | tuning is wanted over the default behavior of the hasher. If TUNING is
|
---|
159 | NULL, the default tuning parameters are used instead. If TUNING is
|
---|
160 | provided but the values requested are out of bounds or might cause
|
---|
161 | rounding errors, return NULL.
|
---|
162 |
|
---|
163 | The user-supplied HASHER function, when not NULL, accepts two
|
---|
164 | arguments ENTRY and TABLE_SIZE. It computes, by hashing ENTRY contents, a
|
---|
165 | slot number for that entry which should be in the range 0..TABLE_SIZE-1.
|
---|
166 | This slot number is then returned.
|
---|
167 |
|
---|
168 | The user-supplied COMPARATOR function, when not NULL, accepts two
|
---|
169 | arguments pointing to user data, it then returns true for a pair of entries
|
---|
170 | that compare equal, or false otherwise. This function is internally called
|
---|
171 | on entries which are already known to hash to the same bucket index,
|
---|
172 | but which are distinct pointers.
|
---|
173 |
|
---|
174 | The user-supplied DATA_FREER function, when not NULL, may be later called
|
---|
175 | with the user data as an argument, just before the entry containing the
|
---|
176 | data gets freed. This happens from within 'hash_free' or 'hash_clear'.
|
---|
177 | You should specify this function only if you want these functions to free
|
---|
178 | all of your 'data' data. This is typically the case when your data is
|
---|
179 | simply an auxiliary struct that you have malloc'd to aggregate several
|
---|
180 | values. */
|
---|
181 | _GL_ATTRIBUTE_NODISCARD
|
---|
182 | extern Hash_table *hash_initialize (size_t candidate,
|
---|
183 | const Hash_tuning *tuning,
|
---|
184 | Hash_hasher hasher,
|
---|
185 | Hash_comparator comparator,
|
---|
186 | Hash_data_freer data_freer)
|
---|
187 | _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC (hash_free, 1);
|
---|
188 |
|
---|
189 | /* Same as hash_initialize, but invokes xalloc_die on memory exhaustion. */
|
---|
190 | /* This function is defined by module 'xhash'. */
|
---|
191 | _GL_ATTRIBUTE_NODISCARD
|
---|
192 | extern Hash_table *hash_xinitialize (size_t candidate,
|
---|
193 | const Hash_tuning *tuning,
|
---|
194 | Hash_hasher hasher,
|
---|
195 | Hash_comparator comparator,
|
---|
196 | Hash_data_freer data_freer)
|
---|
197 | _GL_ATTRIBUTE_MALLOC _GL_ATTRIBUTE_DEALLOC (hash_free, 1)
|
---|
198 | _GL_ATTRIBUTE_RETURNS_NONNULL;
|
---|
199 |
|
---|
200 | /* Make all buckets empty, placing any chained entries on the free list.
|
---|
201 | Apply the user-specified function data_freer (if any) to the datas of any
|
---|
202 | affected entries. */
|
---|
203 | extern void hash_clear (Hash_table *table);
|
---|
204 |
|
---|
205 | /*
|
---|
206 | * Insertion and deletion.
|
---|
207 | */
|
---|
208 |
|
---|
209 | /* For an already existing hash table, change the number of buckets through
|
---|
210 | specifying CANDIDATE. The contents of the hash table are preserved. The
|
---|
211 | new number of buckets is automatically selected so as to _guarantee_ that
|
---|
212 | the table may receive at least CANDIDATE different user entries, including
|
---|
213 | those already in the table, before any other growth of the hash table size
|
---|
214 | occurs. If TUNING->IS_N_BUCKETS is true, then CANDIDATE specifies the
|
---|
215 | exact number of buckets desired. Return true iff the rehash succeeded. */
|
---|
216 | _GL_ATTRIBUTE_NODISCARD
|
---|
217 | extern bool hash_rehash (Hash_table *table, size_t candidate);
|
---|
218 |
|
---|
219 | /* If ENTRY matches an entry already in the hash table, return the pointer
|
---|
220 | to the entry from the table. Otherwise, insert ENTRY and return ENTRY.
|
---|
221 | Return NULL if the storage required for insertion cannot be allocated.
|
---|
222 | This implementation does not support duplicate entries or insertion of
|
---|
223 | NULL. */
|
---|
224 | _GL_ATTRIBUTE_NODISCARD
|
---|
225 | extern void *hash_insert (Hash_table *table, const void *entry);
|
---|
226 |
|
---|
227 | /* Same as hash_insert, but invokes xalloc_die on memory exhaustion. */
|
---|
228 | /* This function is defined by module 'xhash'. */
|
---|
229 | extern void *hash_xinsert (Hash_table *table, const void *entry);
|
---|
230 |
|
---|
231 | /* Insert ENTRY into hash TABLE if there is not already a matching entry.
|
---|
232 |
|
---|
233 | Return -1 upon memory allocation failure.
|
---|
234 | Return 1 if insertion succeeded.
|
---|
235 | Return 0 if there is already a matching entry in the table,
|
---|
236 | and in that case, if MATCHED_ENT is non-NULL, set *MATCHED_ENT
|
---|
237 | to that entry.
|
---|
238 |
|
---|
239 | This interface is easier to use than hash_insert when you must
|
---|
240 | distinguish between the latter two cases. More importantly,
|
---|
241 | hash_insert is unusable for some types of ENTRY values. When using
|
---|
242 | hash_insert, the only way to distinguish those cases is to compare
|
---|
243 | the return value and ENTRY. That works only when you can have two
|
---|
244 | different ENTRY values that point to data that compares "equal". Thus,
|
---|
245 | when the ENTRY value is a simple scalar, you must use
|
---|
246 | hash_insert_if_absent. ENTRY must not be NULL. */
|
---|
247 | extern int hash_insert_if_absent (Hash_table *table, const void *entry,
|
---|
248 | const void **matched_ent);
|
---|
249 |
|
---|
250 | /* If ENTRY is already in the table, remove it and return the just-deleted
|
---|
251 | data (the user may want to deallocate its storage). If ENTRY is not in the
|
---|
252 | table, don't modify the table and return NULL. */
|
---|
253 | extern void *hash_remove (Hash_table *table, const void *entry);
|
---|
254 |
|
---|
255 | /* Same as hash_remove. This interface is deprecated.
|
---|
256 | FIXME: Remove in 2022. */
|
---|
257 | _GL_ATTRIBUTE_DEPRECATED
|
---|
258 | extern void *hash_delete (Hash_table *table, const void *entry);
|
---|
259 |
|
---|
260 | # ifdef __cplusplus
|
---|
261 | }
|
---|
262 | # endif
|
---|
263 |
|
---|
264 | #endif
|
---|