VirtualBox

source: kBuild/vendor/grep/3.7/lib/hash.c@ 3576

Last change on this file since 3576 was 3529, checked in by bird, 3 years ago

Imported grep 3.7 from grep-3.7.tar.gz (sha256: c22b0cf2d4f6bbe599c902387e8058990e1eee99aef333a203829e5fd3dbb342), applying minimal auto-props.

  • Property svn:eol-style set to native
File size: 32.1 KB
Line 
1/* hash - hashing table processing.
2
3 Copyright (C) 1998-2004, 2006-2007, 2009-2021 Free Software Foundation, Inc.
4
5 Written by Jim Meyering, 1992.
6
7 This file is free software: you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as
9 published by the Free Software Foundation; either version 2.1 of the
10 License, or (at your option) any later version.
11
12 This file is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with this program. If not, see <https://www.gnu.org/licenses/>. */
19
20/* A generic hash table package. */
21
22/* Define USE_OBSTACK to 1 if you want the allocator to use obstacks instead
23 of malloc. If you change USE_OBSTACK, you have to recompile! */
24
25#include <config.h>
26
27#include "hash.h"
28
29#include "bitrotate.h"
30#include "xalloc-oversized.h"
31
32#include <stdint.h>
33#include <stdio.h>
34#include <stdlib.h>
35
36#if USE_OBSTACK
37# include "obstack.h"
38# ifndef obstack_chunk_alloc
39# define obstack_chunk_alloc malloc
40# endif
41# ifndef obstack_chunk_free
42# define obstack_chunk_free free
43# endif
44#endif
45
46struct hash_entry
47 {
48 void *data;
49 struct hash_entry *next;
50 };
51
52struct hash_table
53 {
54 /* The array of buckets starts at BUCKET and extends to BUCKET_LIMIT-1,
55 for a possibility of N_BUCKETS. Among those, N_BUCKETS_USED buckets
56 are not empty, there are N_ENTRIES active entries in the table. */
57 struct hash_entry *bucket;
58 struct hash_entry const *bucket_limit;
59 size_t n_buckets;
60 size_t n_buckets_used;
61 size_t n_entries;
62
63 /* Tuning arguments, kept in a physically separate structure. */
64 const Hash_tuning *tuning;
65
66 /* Three functions are given to 'hash_initialize', see the documentation
67 block for this function. In a word, HASHER randomizes a user entry
68 into a number up from 0 up to some maximum minus 1; COMPARATOR returns
69 true if two user entries compare equally; and DATA_FREER is the cleanup
70 function for a user entry. */
71 Hash_hasher hasher;
72 Hash_comparator comparator;
73 Hash_data_freer data_freer;
74
75 /* A linked list of freed struct hash_entry structs. */
76 struct hash_entry *free_entry_list;
77
78#if USE_OBSTACK
79 /* Whenever obstacks are used, it is possible to allocate all overflowed
80 entries into a single stack, so they all can be freed in a single
81 operation. It is not clear if the speedup is worth the trouble. */
82 struct obstack entry_stack;
83#endif
84 };
85
86/* A hash table contains many internal entries, each holding a pointer to
87 some user-provided data (also called a user entry). An entry indistinctly
88 refers to both the internal entry and its associated user entry. A user
89 entry contents may be hashed by a randomization function (the hashing
90 function, or just "hasher" for short) into a number (or "slot") between 0
91 and the current table size. At each slot position in the hash table,
92 starts a linked chain of entries for which the user data all hash to this
93 slot. A bucket is the collection of all entries hashing to the same slot.
94
95 A good "hasher" function will distribute entries rather evenly in buckets.
96 In the ideal case, the length of each bucket is roughly the number of
97 entries divided by the table size. Finding the slot for a data is usually
98 done in constant time by the "hasher", and the later finding of a precise
99 entry is linear in time with the size of the bucket. Consequently, a
100 larger hash table size (that is, a larger number of buckets) is prone to
101 yielding shorter chains, *given* the "hasher" function behaves properly.
102
103 Long buckets slow down the lookup algorithm. One might use big hash table
104 sizes in hope to reduce the average length of buckets, but this might
105 become inordinate, as unused slots in the hash table take some space. The
106 best bet is to make sure you are using a good "hasher" function (beware
107 that those are not that easy to write! :-), and to use a table size
108 larger than the actual number of entries. */
109
110/* If an insertion makes the ratio of nonempty buckets to table size larger
111 than the growth threshold (a number between 0.0 and 1.0), then increase
112 the table size by multiplying by the growth factor (a number greater than
113 1.0). The growth threshold defaults to 0.8, and the growth factor
114 defaults to 1.414, meaning that the table will have doubled its size
115 every second time 80% of the buckets get used. */
116#define DEFAULT_GROWTH_THRESHOLD 0.8f
117#define DEFAULT_GROWTH_FACTOR 1.414f
118
119/* If a deletion empties a bucket and causes the ratio of used buckets to
120 table size to become smaller than the shrink threshold (a number between
121 0.0 and 1.0), then shrink the table by multiplying by the shrink factor (a
122 number greater than the shrink threshold but smaller than 1.0). The shrink
123 threshold and factor default to 0.0 and 1.0, meaning that the table never
124 shrinks. */
125#define DEFAULT_SHRINK_THRESHOLD 0.0f
126#define DEFAULT_SHRINK_FACTOR 1.0f
127
128/* Use this to initialize or reset a TUNING structure to
129 some sensible values. */
130static const Hash_tuning default_tuning =
131 {
132 DEFAULT_SHRINK_THRESHOLD,
133 DEFAULT_SHRINK_FACTOR,
134 DEFAULT_GROWTH_THRESHOLD,
135 DEFAULT_GROWTH_FACTOR,
136 false
137 };
138
139/* Information and lookup. */
140
141size_t
142hash_get_n_buckets (const Hash_table *table)
143{
144 return table->n_buckets;
145}
146
147size_t
148hash_get_n_buckets_used (const Hash_table *table)
149{
150 return table->n_buckets_used;
151}
152
153size_t
154hash_get_n_entries (const Hash_table *table)
155{
156 return table->n_entries;
157}
158
159size_t
160hash_get_max_bucket_length (const Hash_table *table)
161{
162 struct hash_entry const *bucket;
163 size_t max_bucket_length = 0;
164
165 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
166 {
167 if (bucket->data)
168 {
169 struct hash_entry const *cursor = bucket;
170 size_t bucket_length = 1;
171
172 while (cursor = cursor->next, cursor)
173 bucket_length++;
174
175 if (bucket_length > max_bucket_length)
176 max_bucket_length = bucket_length;
177 }
178 }
179
180 return max_bucket_length;
181}
182
183bool
184hash_table_ok (const Hash_table *table)
185{
186 struct hash_entry const *bucket;
187 size_t n_buckets_used = 0;
188 size_t n_entries = 0;
189
190 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
191 {
192 if (bucket->data)
193 {
194 struct hash_entry const *cursor = bucket;
195
196 /* Count bucket head. */
197 n_buckets_used++;
198 n_entries++;
199
200 /* Count bucket overflow. */
201 while (cursor = cursor->next, cursor)
202 n_entries++;
203 }
204 }
205
206 if (n_buckets_used == table->n_buckets_used && n_entries == table->n_entries)
207 return true;
208
209 return false;
210}
211
212void
213hash_print_statistics (const Hash_table *table, FILE *stream)
214{
215 size_t n_entries = hash_get_n_entries (table);
216 size_t n_buckets = hash_get_n_buckets (table);
217 size_t n_buckets_used = hash_get_n_buckets_used (table);
218 size_t max_bucket_length = hash_get_max_bucket_length (table);
219
220 fprintf (stream, "# entries: %lu\n", (unsigned long int) n_entries);
221 fprintf (stream, "# buckets: %lu\n", (unsigned long int) n_buckets);
222 fprintf (stream, "# buckets used: %lu (%.2f%%)\n",
223 (unsigned long int) n_buckets_used,
224 (100.0 * n_buckets_used) / n_buckets);
225 fprintf (stream, "max bucket length: %lu\n",
226 (unsigned long int) max_bucket_length);
227}
228
229/* Hash KEY and return a pointer to the selected bucket.
230 If TABLE->hasher misbehaves, abort. */
231static struct hash_entry *
232safe_hasher (const Hash_table *table, const void *key)
233{
234 size_t n = table->hasher (key, table->n_buckets);
235 if (! (n < table->n_buckets))
236 abort ();
237 return table->bucket + n;
238}
239
240void *
241hash_lookup (const Hash_table *table, const void *entry)
242{
243 struct hash_entry const *bucket = safe_hasher (table, entry);
244 struct hash_entry const *cursor;
245
246 if (bucket->data == NULL)
247 return NULL;
248
249 for (cursor = bucket; cursor; cursor = cursor->next)
250 if (entry == cursor->data || table->comparator (entry, cursor->data))
251 return cursor->data;
252
253 return NULL;
254}
255
256/* Walking. */
257
258void *
259hash_get_first (const Hash_table *table)
260{
261 struct hash_entry const *bucket;
262
263 if (table->n_entries == 0)
264 return NULL;
265
266 for (bucket = table->bucket; ; bucket++)
267 if (! (bucket < table->bucket_limit))
268 abort ();
269 else if (bucket->data)
270 return bucket->data;
271}
272
273void *
274hash_get_next (const Hash_table *table, const void *entry)
275{
276 struct hash_entry const *bucket = safe_hasher (table, entry);
277 struct hash_entry const *cursor;
278
279 /* Find next entry in the same bucket. */
280 cursor = bucket;
281 do
282 {
283 if (cursor->data == entry && cursor->next)
284 return cursor->next->data;
285 cursor = cursor->next;
286 }
287 while (cursor != NULL);
288
289 /* Find first entry in any subsequent bucket. */
290 while (++bucket < table->bucket_limit)
291 if (bucket->data)
292 return bucket->data;
293
294 /* None found. */
295 return NULL;
296}
297
298size_t
299hash_get_entries (const Hash_table *table, void **buffer,
300 size_t buffer_size)
301{
302 size_t counter = 0;
303 struct hash_entry const *bucket;
304 struct hash_entry const *cursor;
305
306 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
307 {
308 if (bucket->data)
309 {
310 for (cursor = bucket; cursor; cursor = cursor->next)
311 {
312 if (counter >= buffer_size)
313 return counter;
314 buffer[counter++] = cursor->data;
315 }
316 }
317 }
318
319 return counter;
320}
321
322size_t
323hash_do_for_each (const Hash_table *table, Hash_processor processor,
324 void *processor_data)
325{
326 size_t counter = 0;
327 struct hash_entry const *bucket;
328 struct hash_entry const *cursor;
329
330 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
331 {
332 if (bucket->data)
333 {
334 for (cursor = bucket; cursor; cursor = cursor->next)
335 {
336 if (! processor (cursor->data, processor_data))
337 return counter;
338 counter++;
339 }
340 }
341 }
342
343 return counter;
344}
345
346/* Allocation and clean-up. */
347
348#if USE_DIFF_HASH
349
350/* About hashings, Paul Eggert writes to me (FP), on 1994-01-01: "Please see
351 B. J. McKenzie, R. Harries & T. Bell, Selecting a hashing algorithm,
352 Software--practice & experience 20, 2 (Feb 1990), 209-224. Good hash
353 algorithms tend to be domain-specific, so what's good for [diffutils'] io.c
354 may not be good for your application." */
355
356size_t
357hash_string (const char *string, size_t n_buckets)
358{
359# define HASH_ONE_CHAR(Value, Byte) \
360 ((Byte) + rotl_sz (Value, 7))
361
362 size_t value = 0;
363 unsigned char ch;
364
365 for (; (ch = *string); string++)
366 value = HASH_ONE_CHAR (value, ch);
367 return value % n_buckets;
368
369# undef HASH_ONE_CHAR
370}
371
372#else /* not USE_DIFF_HASH */
373
374/* This one comes from 'recode', and performs a bit better than the above as
375 per a few experiments. It is inspired from a hashing routine found in the
376 very old Cyber 'snoop', itself written in typical Greg Mansfield style.
377 (By the way, what happened to this excellent man? Is he still alive?) */
378
379size_t
380hash_string (const char *string, size_t n_buckets)
381{
382 size_t value = 0;
383 unsigned char ch;
384
385 for (; (ch = *string); string++)
386 value = (value * 31 + ch) % n_buckets;
387 return value;
388}
389
390#endif /* not USE_DIFF_HASH */
391
392/* Return true if CANDIDATE is a prime number. CANDIDATE should be an odd
393 number at least equal to 11. */
394
395static bool _GL_ATTRIBUTE_CONST
396is_prime (size_t candidate)
397{
398 size_t divisor = 3;
399 size_t square = divisor * divisor;
400
401 while (square < candidate && (candidate % divisor))
402 {
403 divisor++;
404 square += 4 * divisor;
405 divisor++;
406 }
407
408 return (candidate % divisor ? true : false);
409}
410
411/* Round a given CANDIDATE number up to the nearest prime, and return that
412 prime. Primes lower than 10 are merely skipped. */
413
414static size_t _GL_ATTRIBUTE_CONST
415next_prime (size_t candidate)
416{
417 /* Skip small primes. */
418 if (candidate < 10)
419 candidate = 10;
420
421 /* Make it definitely odd. */
422 candidate |= 1;
423
424 while (SIZE_MAX != candidate && !is_prime (candidate))
425 candidate += 2;
426
427 return candidate;
428}
429
430void
431hash_reset_tuning (Hash_tuning *tuning)
432{
433 *tuning = default_tuning;
434}
435
436/* If the user passes a NULL hasher, we hash the raw pointer. */
437static size_t
438raw_hasher (const void *data, size_t n)
439{
440 /* When hashing unique pointers, it is often the case that they were
441 generated by malloc and thus have the property that the low-order
442 bits are 0. As this tends to give poorer performance with small
443 tables, we rotate the pointer value before performing division,
444 in an attempt to improve hash quality. */
445 size_t val = rotr_sz ((size_t) data, 3);
446 return val % n;
447}
448
449/* If the user passes a NULL comparator, we use pointer comparison. */
450static bool
451raw_comparator (const void *a, const void *b)
452{
453 return a == b;
454}
455
456
457/* For the given hash TABLE, check the user supplied tuning structure for
458 reasonable values, and return true if there is no gross error with it.
459 Otherwise, definitively reset the TUNING field to some acceptable default
460 in the hash table (that is, the user loses the right of further modifying
461 tuning arguments), and return false. */
462
463static bool
464check_tuning (Hash_table *table)
465{
466 const Hash_tuning *tuning = table->tuning;
467 float epsilon;
468 if (tuning == &default_tuning)
469 return true;
470
471 /* Be a bit stricter than mathematics would require, so that
472 rounding errors in size calculations do not cause allocations to
473 fail to grow or shrink as they should. The smallest allocation
474 is 11 (due to next_prime's algorithm), so an epsilon of 0.1
475 should be good enough. */
476 epsilon = 0.1f;
477
478 if (epsilon < tuning->growth_threshold
479 && tuning->growth_threshold < 1 - epsilon
480 && 1 + epsilon < tuning->growth_factor
481 && 0 <= tuning->shrink_threshold
482 && tuning->shrink_threshold + epsilon < tuning->shrink_factor
483 && tuning->shrink_factor <= 1
484 && tuning->shrink_threshold + epsilon < tuning->growth_threshold)
485 return true;
486
487 table->tuning = &default_tuning;
488 return false;
489}
490
491/* Compute the size of the bucket array for the given CANDIDATE and
492 TUNING, or return 0 if there is no possible way to allocate that
493 many entries. */
494
495static size_t _GL_ATTRIBUTE_PURE
496compute_bucket_size (size_t candidate, const Hash_tuning *tuning)
497{
498 if (!tuning->is_n_buckets)
499 {
500 float new_candidate = candidate / tuning->growth_threshold;
501 if ((float) SIZE_MAX <= new_candidate)
502 return 0;
503 candidate = new_candidate;
504 }
505 candidate = next_prime (candidate);
506 if (xalloc_oversized (candidate, sizeof (struct hash_entry *)))
507 return 0;
508 return candidate;
509}
510
511Hash_table *
512hash_initialize (size_t candidate, const Hash_tuning *tuning,
513 Hash_hasher hasher, Hash_comparator comparator,
514 Hash_data_freer data_freer)
515{
516 Hash_table *table;
517
518 if (hasher == NULL)
519 hasher = raw_hasher;
520 if (comparator == NULL)
521 comparator = raw_comparator;
522
523 table = malloc (sizeof *table);
524 if (table == NULL)
525 return NULL;
526
527 if (!tuning)
528 tuning = &default_tuning;
529 table->tuning = tuning;
530 if (!check_tuning (table))
531 {
532 /* Fail if the tuning options are invalid. This is the only occasion
533 when the user gets some feedback about it. Once the table is created,
534 if the user provides invalid tuning options, we silently revert to
535 using the defaults, and ignore further request to change the tuning
536 options. */
537 goto fail;
538 }
539
540 table->n_buckets = compute_bucket_size (candidate, tuning);
541 if (!table->n_buckets)
542 goto fail;
543
544 table->bucket = calloc (table->n_buckets, sizeof *table->bucket);
545 if (table->bucket == NULL)
546 goto fail;
547 table->bucket_limit = table->bucket + table->n_buckets;
548 table->n_buckets_used = 0;
549 table->n_entries = 0;
550
551 table->hasher = hasher;
552 table->comparator = comparator;
553 table->data_freer = data_freer;
554
555 table->free_entry_list = NULL;
556#if USE_OBSTACK
557 obstack_init (&table->entry_stack);
558#endif
559 return table;
560
561 fail:
562 free (table);
563 return NULL;
564}
565
566void
567hash_clear (Hash_table *table)
568{
569 struct hash_entry *bucket;
570
571 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
572 {
573 if (bucket->data)
574 {
575 struct hash_entry *cursor;
576 struct hash_entry *next;
577
578 /* Free the bucket overflow. */
579 for (cursor = bucket->next; cursor; cursor = next)
580 {
581 if (table->data_freer)
582 table->data_freer (cursor->data);
583 cursor->data = NULL;
584
585 next = cursor->next;
586 /* Relinking is done one entry at a time, as it is to be expected
587 that overflows are either rare or short. */
588 cursor->next = table->free_entry_list;
589 table->free_entry_list = cursor;
590 }
591
592 /* Free the bucket head. */
593 if (table->data_freer)
594 table->data_freer (bucket->data);
595 bucket->data = NULL;
596 bucket->next = NULL;
597 }
598 }
599
600 table->n_buckets_used = 0;
601 table->n_entries = 0;
602}
603
604void
605hash_free (Hash_table *table)
606{
607 struct hash_entry *bucket;
608 struct hash_entry *cursor;
609 struct hash_entry *next;
610
611 /* Call the user data_freer function. */
612 if (table->data_freer && table->n_entries)
613 {
614 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
615 {
616 if (bucket->data)
617 {
618 for (cursor = bucket; cursor; cursor = cursor->next)
619 table->data_freer (cursor->data);
620 }
621 }
622 }
623
624#if USE_OBSTACK
625
626 obstack_free (&table->entry_stack, NULL);
627
628#else
629
630 /* Free all bucket overflowed entries. */
631 for (bucket = table->bucket; bucket < table->bucket_limit; bucket++)
632 {
633 for (cursor = bucket->next; cursor; cursor = next)
634 {
635 next = cursor->next;
636 free (cursor);
637 }
638 }
639
640 /* Also reclaim the internal list of previously freed entries. */
641 for (cursor = table->free_entry_list; cursor; cursor = next)
642 {
643 next = cursor->next;
644 free (cursor);
645 }
646
647#endif
648
649 /* Free the remainder of the hash table structure. */
650 free (table->bucket);
651 free (table);
652}
653
654/* Insertion and deletion. */
655
656/* Get a new hash entry for a bucket overflow, possibly by recycling a
657 previously freed one. If this is not possible, allocate a new one. */
658
659static struct hash_entry *
660allocate_entry (Hash_table *table)
661{
662 struct hash_entry *new;
663
664 if (table->free_entry_list)
665 {
666 new = table->free_entry_list;
667 table->free_entry_list = new->next;
668 }
669 else
670 {
671#if USE_OBSTACK
672 new = obstack_alloc (&table->entry_stack, sizeof *new);
673#else
674 new = malloc (sizeof *new);
675#endif
676 }
677
678 return new;
679}
680
681/* Free a hash entry which was part of some bucket overflow,
682 saving it for later recycling. */
683
684static void
685free_entry (Hash_table *table, struct hash_entry *entry)
686{
687 entry->data = NULL;
688 entry->next = table->free_entry_list;
689 table->free_entry_list = entry;
690}
691
692/* This private function is used to help with insertion and deletion. When
693 ENTRY matches an entry in the table, return a pointer to the corresponding
694 user data and set *BUCKET_HEAD to the head of the selected bucket.
695 Otherwise, return NULL. When DELETE is true and ENTRY matches an entry in
696 the table, unlink the matching entry. */
697
698static void *
699hash_find_entry (Hash_table *table, const void *entry,
700 struct hash_entry **bucket_head, bool delete)
701{
702 struct hash_entry *bucket = safe_hasher (table, entry);
703 struct hash_entry *cursor;
704
705 *bucket_head = bucket;
706
707 /* Test for empty bucket. */
708 if (bucket->data == NULL)
709 return NULL;
710
711 /* See if the entry is the first in the bucket. */
712 if (entry == bucket->data || table->comparator (entry, bucket->data))
713 {
714 void *data = bucket->data;
715
716 if (delete)
717 {
718 if (bucket->next)
719 {
720 struct hash_entry *next = bucket->next;
721
722 /* Bump the first overflow entry into the bucket head, then save
723 the previous first overflow entry for later recycling. */
724 *bucket = *next;
725 free_entry (table, next);
726 }
727 else
728 {
729 bucket->data = NULL;
730 }
731 }
732
733 return data;
734 }
735
736 /* Scan the bucket overflow. */
737 for (cursor = bucket; cursor->next; cursor = cursor->next)
738 {
739 if (entry == cursor->next->data
740 || table->comparator (entry, cursor->next->data))
741 {
742 void *data = cursor->next->data;
743
744 if (delete)
745 {
746 struct hash_entry *next = cursor->next;
747
748 /* Unlink the entry to delete, then save the freed entry for later
749 recycling. */
750 cursor->next = next->next;
751 free_entry (table, next);
752 }
753
754 return data;
755 }
756 }
757
758 /* No entry found. */
759 return NULL;
760}
761
762/* Internal helper, to move entries from SRC to DST. Both tables must
763 share the same free entry list. If SAFE, only move overflow
764 entries, saving bucket heads for later, so that no allocations will
765 occur. Return false if the free entry list is exhausted and an
766 allocation fails. */
767
768static bool
769transfer_entries (Hash_table *dst, Hash_table *src, bool safe)
770{
771 struct hash_entry *bucket;
772 struct hash_entry *cursor;
773 struct hash_entry *next;
774 for (bucket = src->bucket; bucket < src->bucket_limit; bucket++)
775 if (bucket->data)
776 {
777 void *data;
778 struct hash_entry *new_bucket;
779
780 /* Within each bucket, transfer overflow entries first and
781 then the bucket head, to minimize memory pressure. After
782 all, the only time we might allocate is when moving the
783 bucket head, but moving overflow entries first may create
784 free entries that can be recycled by the time we finally
785 get to the bucket head. */
786 for (cursor = bucket->next; cursor; cursor = next)
787 {
788 data = cursor->data;
789 new_bucket = safe_hasher (dst, data);
790
791 next = cursor->next;
792
793 if (new_bucket->data)
794 {
795 /* Merely relink an existing entry, when moving from a
796 bucket overflow into a bucket overflow. */
797 cursor->next = new_bucket->next;
798 new_bucket->next = cursor;
799 }
800 else
801 {
802 /* Free an existing entry, when moving from a bucket
803 overflow into a bucket header. */
804 new_bucket->data = data;
805 dst->n_buckets_used++;
806 free_entry (dst, cursor);
807 }
808 }
809 /* Now move the bucket head. Be sure that if we fail due to
810 allocation failure that the src table is in a consistent
811 state. */
812 data = bucket->data;
813 bucket->next = NULL;
814 if (safe)
815 continue;
816 new_bucket = safe_hasher (dst, data);
817
818 if (new_bucket->data)
819 {
820 /* Allocate or recycle an entry, when moving from a bucket
821 header into a bucket overflow. */
822 struct hash_entry *new_entry = allocate_entry (dst);
823
824 if (new_entry == NULL)
825 return false;
826
827 new_entry->data = data;
828 new_entry->next = new_bucket->next;
829 new_bucket->next = new_entry;
830 }
831 else
832 {
833 /* Move from one bucket header to another. */
834 new_bucket->data = data;
835 dst->n_buckets_used++;
836 }
837 bucket->data = NULL;
838 src->n_buckets_used--;
839 }
840 return true;
841}
842
843bool
844hash_rehash (Hash_table *table, size_t candidate)
845{
846 Hash_table storage;
847 Hash_table *new_table;
848 size_t new_size = compute_bucket_size (candidate, table->tuning);
849
850 if (!new_size)
851 return false;
852 if (new_size == table->n_buckets)
853 return true;
854 new_table = &storage;
855 new_table->bucket = calloc (new_size, sizeof *new_table->bucket);
856 if (new_table->bucket == NULL)
857 return false;
858 new_table->n_buckets = new_size;
859 new_table->bucket_limit = new_table->bucket + new_size;
860 new_table->n_buckets_used = 0;
861 new_table->n_entries = 0;
862 new_table->tuning = table->tuning;
863 new_table->hasher = table->hasher;
864 new_table->comparator = table->comparator;
865 new_table->data_freer = table->data_freer;
866
867 /* In order for the transfer to successfully complete, we need
868 additional overflow entries when distinct buckets in the old
869 table collide into a common bucket in the new table. The worst
870 case possible is a hasher that gives a good spread with the old
871 size, but returns a constant with the new size; if we were to
872 guarantee table->n_buckets_used-1 free entries in advance, then
873 the transfer would be guaranteed to not allocate memory.
874 However, for large tables, a guarantee of no further allocation
875 introduces a lot of extra memory pressure, all for an unlikely
876 corner case (most rehashes reduce, rather than increase, the
877 number of overflow entries needed). So, we instead ensure that
878 the transfer process can be reversed if we hit a memory
879 allocation failure mid-transfer. */
880
881 /* Merely reuse the extra old space into the new table. */
882#if USE_OBSTACK
883 new_table->entry_stack = table->entry_stack;
884#endif
885 new_table->free_entry_list = table->free_entry_list;
886
887 if (transfer_entries (new_table, table, false))
888 {
889 /* Entries transferred successfully; tie up the loose ends. */
890 free (table->bucket);
891 table->bucket = new_table->bucket;
892 table->bucket_limit = new_table->bucket_limit;
893 table->n_buckets = new_table->n_buckets;
894 table->n_buckets_used = new_table->n_buckets_used;
895 table->free_entry_list = new_table->free_entry_list;
896 /* table->n_entries and table->entry_stack already hold their value. */
897 return true;
898 }
899
900 /* We've allocated new_table->bucket (and possibly some entries),
901 exhausted the free list, and moved some but not all entries into
902 new_table. We must undo the partial move before returning
903 failure. The only way to get into this situation is if new_table
904 uses fewer buckets than the old table, so we will reclaim some
905 free entries as overflows in the new table are put back into
906 distinct buckets in the old table.
907
908 There are some pathological cases where a single pass through the
909 table requires more intermediate overflow entries than using two
910 passes. Two passes give worse cache performance and takes
911 longer, but at this point, we're already out of memory, so slow
912 and safe is better than failure. */
913 table->free_entry_list = new_table->free_entry_list;
914 if (! (transfer_entries (table, new_table, true)
915 && transfer_entries (table, new_table, false)))
916 abort ();
917 /* table->n_entries already holds its value. */
918 free (new_table->bucket);
919 return false;
920}
921
922int
923hash_insert_if_absent (Hash_table *table, void const *entry,
924 void const **matched_ent)
925{
926 void *data;
927 struct hash_entry *bucket;
928
929 /* The caller cannot insert a NULL entry, since hash_lookup returns NULL
930 to indicate "not found", and hash_find_entry uses "bucket->data == NULL"
931 to indicate an empty bucket. */
932 if (! entry)
933 abort ();
934
935 /* If there's a matching entry already in the table, return that. */
936 if ((data = hash_find_entry (table, entry, &bucket, false)) != NULL)
937 {
938 if (matched_ent)
939 *matched_ent = data;
940 return 0;
941 }
942
943 /* If the growth threshold of the buckets in use has been reached, increase
944 the table size and rehash. There's no point in checking the number of
945 entries: if the hashing function is ill-conditioned, rehashing is not
946 likely to improve it. */
947
948 if (table->n_buckets_used
949 > table->tuning->growth_threshold * table->n_buckets)
950 {
951 /* Check more fully, before starting real work. If tuning arguments
952 became invalid, the second check will rely on proper defaults. */
953 check_tuning (table);
954 if (table->n_buckets_used
955 > table->tuning->growth_threshold * table->n_buckets)
956 {
957 const Hash_tuning *tuning = table->tuning;
958 float candidate =
959 (tuning->is_n_buckets
960 ? (table->n_buckets * tuning->growth_factor)
961 : (table->n_buckets * tuning->growth_factor
962 * tuning->growth_threshold));
963
964 if ((float) SIZE_MAX <= candidate)
965 return -1;
966
967 /* If the rehash fails, arrange to return NULL. */
968 if (!hash_rehash (table, candidate))
969 return -1;
970
971 /* Update the bucket we are interested in. */
972 if (hash_find_entry (table, entry, &bucket, false) != NULL)
973 abort ();
974 }
975 }
976
977 /* ENTRY is not matched, it should be inserted. */
978
979 if (bucket->data)
980 {
981 struct hash_entry *new_entry = allocate_entry (table);
982
983 if (new_entry == NULL)
984 return -1;
985
986 /* Add ENTRY in the overflow of the bucket. */
987
988 new_entry->data = (void *) entry;
989 new_entry->next = bucket->next;
990 bucket->next = new_entry;
991 table->n_entries++;
992 return 1;
993 }
994
995 /* Add ENTRY right in the bucket head. */
996
997 bucket->data = (void *) entry;
998 table->n_entries++;
999 table->n_buckets_used++;
1000
1001 return 1;
1002}
1003
1004void *
1005hash_insert (Hash_table *table, void const *entry)
1006{
1007 void const *matched_ent;
1008 int err = hash_insert_if_absent (table, entry, &matched_ent);
1009 return (err == -1
1010 ? NULL
1011 : (void *) (err == 0 ? matched_ent : entry));
1012}
1013
1014void *
1015hash_remove (Hash_table *table, const void *entry)
1016{
1017 void *data;
1018 struct hash_entry *bucket;
1019
1020 data = hash_find_entry (table, entry, &bucket, true);
1021 if (!data)
1022 return NULL;
1023
1024 table->n_entries--;
1025 if (!bucket->data)
1026 {
1027 table->n_buckets_used--;
1028
1029 /* If the shrink threshold of the buckets in use has been reached,
1030 rehash into a smaller table. */
1031
1032 if (table->n_buckets_used
1033 < table->tuning->shrink_threshold * table->n_buckets)
1034 {
1035 /* Check more fully, before starting real work. If tuning arguments
1036 became invalid, the second check will rely on proper defaults. */
1037 check_tuning (table);
1038 if (table->n_buckets_used
1039 < table->tuning->shrink_threshold * table->n_buckets)
1040 {
1041 const Hash_tuning *tuning = table->tuning;
1042 size_t candidate =
1043 (tuning->is_n_buckets
1044 ? table->n_buckets * tuning->shrink_factor
1045 : (table->n_buckets * tuning->shrink_factor
1046 * tuning->growth_threshold));
1047
1048 if (!hash_rehash (table, candidate))
1049 {
1050 /* Failure to allocate memory in an attempt to
1051 shrink the table is not fatal. But since memory
1052 is low, we can at least be kind and free any
1053 spare entries, rather than keeping them tied up
1054 in the free entry list. */
1055#if ! USE_OBSTACK
1056 struct hash_entry *cursor = table->free_entry_list;
1057 struct hash_entry *next;
1058 while (cursor)
1059 {
1060 next = cursor->next;
1061 free (cursor);
1062 cursor = next;
1063 }
1064 table->free_entry_list = NULL;
1065#endif
1066 }
1067 }
1068 }
1069 }
1070
1071 return data;
1072}
1073
1074void *
1075hash_delete (Hash_table *table, const void *entry)
1076{
1077 return hash_remove (table, entry);
1078}
1079
1080/* Testing. */
1081
1082#if TESTING
1083
1084void
1085hash_print (const Hash_table *table)
1086{
1087 struct hash_entry *bucket = (struct hash_entry *) table->bucket;
1088
1089 for ( ; bucket < table->bucket_limit; bucket++)
1090 {
1091 struct hash_entry *cursor;
1092
1093 if (bucket)
1094 printf ("%lu:\n", (unsigned long int) (bucket - table->bucket));
1095
1096 for (cursor = bucket; cursor; cursor = cursor->next)
1097 {
1098 char const *s = cursor->data;
1099 /* FIXME */
1100 if (s)
1101 printf (" %s\n", s);
1102 }
1103 }
1104}
1105
1106#endif /* TESTING */
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette