VirtualBox

source: kBuild/trunk/src/kmk/strcache2.c@ 1869

Last change on this file since 1869 was 1869, checked in by bird, 16 years ago

kmk: untested version of strcache2, lacking rehash. This is a string allocation cache similar to the one found in strcache, except that all the hashing and hashtable code is inlined and tuned. It further allows the user to have as many instances as he wishes as well as associating a value (pointer) with each entry.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 15.8 KB
Line 
1/* $Id: strcache2.c 1869 2008-10-16 05:05:04Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <[email protected]>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef WINDOWS32
39# include <io.h>
40# include <process.h>
41# include <Windows.h>
42# define PARSE_IN_WORKER
43#endif
44
45#ifdef __OS2__
46# include <sys/fmutex.h>
47#endif
48
49#ifdef HAVE_PTHREAD
50# include <pthread.h>
51#endif
52
53
54/*******************************************************************************
55* Defined Constants And Macros *
56*******************************************************************************/
57/* The default size of a memory segment (1MB). */
58#define STRCACHE2_SEG_SIZE (1024U*1024U)
59/* The initial hash table size (number of entries). Power of two. */
60#define STRCACHE2_INITIAL_SIZE 0x10000U
61
62
63/*******************************************************************************
64* Global Variables *
65*******************************************************************************/
66/* the deleted filler hash table entry. */
67static struct strcache2_entry deleted_entry;
68
69
70static struct strcache2_seg *
71strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
72{
73 struct strcache2_seg *seg;
74 size_t size;
75
76 size = STRCACHE2_SEG_SIZE;
77 if (size < (size_t)minlen + sizeof (struct strcache2_seg))
78 {
79 size = (size_t)minlen * 2;
80 size = (size + 0xfff) & ~(size_t)0xfff;
81 }
82
83 seg = xmalloc (size);
84 seg->cursor = seg->start = (char *)(seg + 1);
85 seg->size = seg->avail = size - sizeof (struct strcache2_seg);
86 assert (seg->size > minlen);
87
88 seg->next = cache->seg_head;
89 cache->seg_head = seg;
90
91 return seg;
92}
93
94MY_INLINE unsigned int
95strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
96{
97 unsigned int hash = 0;
98 if (MY_PREDICT_TRUE(len >= 2))
99 {
100 unsigned int ch0 = *str++;
101 hash = len--;
102 while (len >= 2)
103 {
104 unsigned int ch1 = *str++;
105 hash += ch0 << (ch1 & 0xf);
106
107 ch0 = *str++;
108 hash += ch1 << (ch0 & 0xf);
109
110 len -= 2;
111 }
112 if (len == 1)
113 {
114 unsigned ch1 = *str;
115 hash += ch0 << (ch1 & 0xf);
116
117 hash += ch1;
118 }
119 else
120 hash += ch0;
121 }
122 else if (len)
123 {
124 hash = *str;
125 hash += hash << (hash & 0xf);
126 }
127 else
128 hash = 0;
129 return hash;
130}
131
132MY_INLINE unsigned int
133strcache2_case_sensitive_hash_2 (const char *str, unsigned int len)
134{
135 unsigned int hash = 0;
136 if (MY_PREDICT_TRUE(len >= 2))
137 {
138 unsigned int ch0 = *str++;
139 hash = len--;
140 while (len >= 2)
141 {
142 unsigned int ch1 = *str++;
143 hash += ch0 << (ch1 & 0x7);
144
145 ch0 = *str++;
146 hash += ch1 << (ch0 & 0x7);
147
148 len -= 2;
149 }
150 if (len == 1)
151 {
152 unsigned ch1 = *str;
153 hash += ch0 << (ch1 & 0x7);
154
155 hash += ch1;
156 }
157 else
158 hash += ch0;
159 }
160 else if (len)
161 {
162 hash = *str;
163 hash += hash << (hash & 0x7);
164 }
165 else
166 hash = 0;
167 return hash | 1;
168}
169
170MY_INLINE unsigned int
171strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
172{
173 unsigned int hash = 0;
174 if (MY_PREDICT_TRUE(len >= 2))
175 {
176 unsigned int ch0 = *str++;
177 ch0 = tolower (ch0);
178 hash = len--;
179 while (len >= 2)
180 {
181 unsigned int ch1 = *str++;
182 ch1 = tolower (ch1);
183 hash += ch0 << (ch1 & 0xf);
184
185 ch0 = *str++;
186 ch0 = tolower (ch0);
187 hash += ch1 << (ch0 & 0xf);
188
189 len -= 2;
190 }
191 if (len == 1)
192 {
193 unsigned ch1 = *str;
194 ch1 = tolower (ch1);
195 hash += ch0 << (ch1 & 0xf);
196
197 hash += ch1;
198 }
199 else
200 hash += ch0;
201 }
202 else if (len)
203 {
204 hash = *str;
205 hash += hash << (hash & 0xf);
206 }
207 else
208 hash = 0;
209 return hash;
210}
211
212MY_INLINE unsigned int
213strcache2_case_insensitive_hash_2 (const char *str, unsigned int len)
214{
215 unsigned int hash = 0;
216 if (MY_PREDICT_TRUE(len >= 2))
217 {
218 unsigned int ch0 = *str++;
219 ch0 = tolower (ch0);
220 hash = len--;
221 while (len >= 2)
222 {
223 unsigned int ch1 = *str++;
224 ch1 = tolower (ch1);
225 hash += ch0 << (ch1 & 0x7);
226
227 ch0 = *str++;
228 ch0 = tolower (ch0);
229 hash += ch1 << (ch0 & 0x7);
230
231 len -= 2;
232 }
233 if (len == 1)
234 {
235 unsigned ch1 = *str;
236 ch1 = tolower (ch1);
237 hash += ch0 << (ch1 & 0x7);
238
239 hash += ch1;
240 }
241 else
242 hash += ch0;
243 }
244 else if (len)
245 {
246 hash = *str;
247 hash += hash << (hash & 0x7);
248 }
249 else
250 hash = 0;
251 return hash;
252}
253
254MY_INLINE int
255strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
256 const char *str, unsigned int length, unsigned int hash1)
257{
258 /* the simple stuff first. */
259 if ( entry == NULL
260 || entry == &deleted_entry
261 || entry->hash1 != hash1
262 || entry->length != length)
263 return 0;
264
265 return !cache->case_insensitive
266 ? memcmp (cache + 1, str, length) == 0
267#if defined(_MSC_VER) || defined(__OS2__)
268 : _memicmp (cache + 1, str, length) == 0
269#else
270 : strncasecmp ((const char *)(cache + 1), str, length) == 0
271#endif
272 ;
273}
274
275static void
276strcache2_rehash (struct strcache2 *cache)
277{
278 /* TODO */
279 struct strcache2 **ptr = (struct strcache2 **)1;
280 assert(0);
281 *ptr = cache;
282}
283
284/* Internal worker that enters a new string into the cache. */
285static const char *
286strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
287 const char *str, unsigned int length,
288 unsigned int hash1, unsigned hash2)
289{
290 struct strcache2_entry *entry;
291 struct strcache2_seg *seg;
292 unsigned int size;
293 char *str_copy;
294
295 /* Allocate space for the string. */
296
297 size = length + 1 + sizeof (struct strcache2_entry);
298 size = (size + sizeof (void *) - 1) & ~(sizeof (void *) - 1U);
299
300 seg = cache->seg_head;
301 if (MY_PREDICT_FALSE(seg->avail < size))
302 {
303 do
304 seg = seg->next;
305 while (seg && seg->avail < size);
306 if (!seg)
307 seg = strcache2_new_seg (cache, size);
308 }
309
310 entry = (struct strcache2_entry *) seg->cursor;
311 seg->cursor += size;
312 seg->avail -= size;
313
314 /* Setup the entry, copy the string and insert it into the hash table. */
315
316 entry->user = NULL;
317 entry->length = length;
318 entry->hash1 = hash1;
319 entry->hash2 = hash2;
320 str_copy = (char *) memcpy (entry + 1, str, length);
321 str_copy[length] = '\0';
322
323 cache->hash_tab[idx] = entry;
324 cache->count++;
325 if (cache->count >= cache->rehash_count)
326 strcache2_rehash (cache);
327
328 return str_copy;
329}
330
331/* The public add/lookup string interface. */
332const char *
333strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
334{
335 struct strcache2_entry const *entry;
336 unsigned int hash2;
337 unsigned int hash1 = cache->case_insensitive
338 ? strcache2_case_insensitive_hash_1 (str, length)
339 : strcache2_case_sensitive_hash_1 (str, length);
340 unsigned int idx;
341
342 cache->lookup_count++;
343
344 /* Lookup the entry in the hash table, hoping for an
345 early match. */
346
347 idx = hash1 / cache->hash_size;
348 entry = cache->hash_tab[idx];
349 if (strcache2_is_equal (cache, entry, str, length, hash1))
350 return (const char *)(entry + 1);
351 if (!entry)
352 hash2 = 0;
353 else
354 {
355 unsigned int deleted_idx = entry == &deleted_entry ? idx : ~0U;
356 cache->collision_count++;
357
358 hash2 = cache->case_insensitive
359 ? strcache2_case_insensitive_hash_2 (str, length)
360 : strcache2_case_sensitive_hash_2 (str, length);
361 idx += hash2;
362 idx /= cache->hash_size;
363 entry = cache->hash_tab[idx];
364 if (strcache2_is_equal (cache, entry, str, length, hash1))
365 return (const char *)(entry + 1);
366
367 if (entry)
368 {
369 do
370 {
371 if (deleted_idx == ~0U && entry == &deleted_entry)
372 deleted_idx = idx;
373
374 idx += hash2;
375 idx /= cache->hash_size;
376 entry = cache->hash_tab[idx];
377 if (strcache2_is_equal (cache, entry, str, length, hash1))
378 return (const char *)(entry + 1);
379 }
380 while (entry);
381 }
382 if (deleted_idx != ~0U)
383 idx = deleted_idx;
384 }
385
386 /* Not found, add it at IDX. */
387 return strcache2_enter_string (cache, idx, str, length, hash1, hash2);
388}
389
390/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
391int
392strcache2_is_cached (struct strcache2 *cache, const char *str)
393{
394 /* Check mandatory alignment first. */
395
396 if (!((size_t)str & (sizeof (void *) - 1)))
397 {
398 /* Check the segment list and consider the question answered if the
399 string is within one of them. (Could check it more thoroughly...) */
400
401 struct strcache2_seg const *seg;
402 for (seg = cache->seg_head; seg; seg++)
403 if ((size_t)(str - seg->start) < seg->size)
404 return 1;
405 }
406
407 return 0;
408}
409
410
411/* Verify the integrity of the specified string, returning 0 if OK. */
412int
413strcache2_verify_entry (struct strcache2 *cache, const char *str)
414{
415 struct strcache2_entry const *entry;
416 unsigned hash;
417 const char *end;
418
419 if ((size_t)str & (sizeof (void *) - 1))
420 {
421 fprintf (stderr,
422 "strcache2[%s]: missaligned string %p\nstring: %s\n",
423 cache->name, (void *)str, str);
424 return -1;
425 }
426
427 entry = (struct strcache2_entry const *)str - 1;
428 end = memchr (str, '\0', entry->length);
429 if ((size_t)(end - str) == entry->length)
430 {
431 fprintf (stderr,
432 "strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
433 cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
434 return -1;
435 }
436
437 hash = cache->case_insensitive
438 ? strcache2_case_insensitive_hash_1 (str, entry->length)
439 : strcache2_case_sensitive_hash_1 (str, entry->length);
440 if (hash != entry->hash1)
441 {
442 fprintf (stderr,
443 "strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
444 cache->name, (void *)entry, hash, entry->hash1, str);
445 return -1;
446 }
447
448 if (entry->hash2)
449 {
450 hash = cache->case_insensitive
451 ? strcache2_case_insensitive_hash_2 (str, entry->length)
452 : strcache2_case_sensitive_hash_2 (str, entry->length);
453 if (hash != entry->hash1)
454 {
455 fprintf (stderr,
456 "strcache2[%s]: corrupt entry %p, hash#2: %x, expected %x;\nstring: %s\n",
457 cache->name, (void *)entry, hash, entry->hash2, str);
458 return -1;
459 }
460 }
461
462 return 0;
463}
464
465/* Fallback for calculating and returning the 2nd hash. */
466unsigned int
467strcache2_get_hash2_fallback (struct strcache2 *cache, const char *str)
468{
469 struct strcache2_entry *entry = (struct strcache2_entry *) str - 1;
470 unsigned hash2 = cache->case_insensitive
471 ? strcache2_case_insensitive_hash_2 (str, entry->length)
472 : strcache2_case_sensitive_hash_2 (str, entry->length);
473 entry->hash2 = hash2;
474 return hash2;
475}
476
477/* List of initialized string caches. */
478static struct strcache2 *strcache_head;
479
480/* Initalizes a new cache. */
481void
482strcache2_init (struct strcache2 *cache, const char *name,
483 int case_insensitive, int thread_safe)
484{
485 assert (!thread_safe);
486
487 cache->case_insensitive = case_insensitive;
488 cache->hash_size = STRCACHE2_INITIAL_SIZE;
489 cache->count = 0;
490 cache->rehash_count = STRCACHE2_INITIAL_SIZE / 4 * 3;
491 cache->collision_count = 0;
492 cache->lookup_count = 0;
493 cache->lock = NULL;
494 cache->seg_head = NULL;
495 cache->name = name;
496
497 cache->hash_tab = (struct strcache2_entry **)
498 xmalloc (STRCACHE2_INITIAL_SIZE * sizeof (struct strcache2_entry *));
499 memset (cache->hash_tab, '\0', STRCACHE2_INITIAL_SIZE * sizeof (struct strcache2_entry *));
500 strcache2_new_seg (cache, 0);
501
502 /* link it */
503 cache->next = strcache_head;
504 strcache_head = cache;
505}
506
507
508/* Terminates a string cache, freeing all memory and other
509 associated resources. */
510void
511strcache2_term (struct strcache2 *cache)
512{
513 /* unlink it */
514
515 if (strcache_head == cache)
516 strcache_head = cache->next;
517 else
518 {
519 struct strcache2 *prev = strcache_head;
520 while (prev->next != cache)
521 prev = prev->next;
522 assert (prev);
523 prev->next = cache->next;
524 }
525
526 /* free the memory segments */
527
528 do
529 {
530 void *free_it = cache->seg_head;
531 cache->seg_head = cache->seg_head->next;
532 free (free_it);
533 }
534 while (cache->seg_head);
535
536 /* free the hash and clear the structure. */
537
538 free (cache->hash_tab);
539 memset (cache, '\0', sizeof (struct strcache2));
540}
541
542
543void
544strcache2_print_stats (struct strcache2 *cache, const char *prefix)
545{
546 unsigned int seg_count = 0;
547 unsigned long seg_total_bytes = 0;
548 unsigned long seg_avg_bytes;
549 unsigned long seg_avail_bytes = 0;
550 unsigned long seg_max_bytes = 0;
551 struct strcache2_seg *seg;
552 unsigned long str_total_len = 0;
553 unsigned int str_avg_len;
554 unsigned int str_min_len = ~0U;
555 unsigned int str_max_len = 0;
556 unsigned int idx;
557 unsigned int rehashes;
558
559 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
560
561 /* Segment statistics. */
562
563 for (seg = cache->seg_head; seg; seg = seg->next)
564 {
565 seg_count++;
566 seg_total_bytes += seg->size;
567 seg_avail_bytes += seg->avail;
568 if (seg->size > seg_max_bytes)
569 seg_max_bytes = seg->size;
570 }
571 seg_avg_bytes = seg_total_bytes / seg_count;
572 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
573 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
574 STRCACHE2_SEG_SIZE, seg_avail_bytes);
575
576 /* String statistics. */
577
578 idx = cache->hash_size;
579 while (idx-- > 0)
580 {
581 struct strcache2_entry const *entry = cache->hash_tab[idx];
582 if (entry && entry != &deleted_entry)
583 {
584 unsigned int length = entry->length;
585 str_total_len += length;
586 if (length > str_max_len)
587 str_max_len = length;
588 if (length < str_min_len)
589 str_min_len = length;
590 }
591 }
592 str_avg_len = cache->count ? str_total_len / cache->count : 0;
593 printf (_("%s %u strings: total len = %lu / max = %ul / avg = %ul / min = %ul\n"),
594 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
595
596 /* Hash statistics. */
597
598 idx = STRCACHE2_INITIAL_SIZE;
599 rehashes = 0;
600 while (idx < cache->hash_size)
601 {
602 idx *= 2;
603 rehashes++;
604 }
605
606 printf (_("%s hash size = %u lookups / collisions = %lu / %lu (%u%%) %u rehashes\n"),
607 prefix, cache->hash_size, cache->lookup_count, cache->collision_count,
608 (unsigned int)((float)cache->collision_count / cache->lookup_count),
609 rehashes);
610
611}
612
613/* Print statistics for all string caches. */
614void
615strcache2_print_stats_all (const char *prefix)
616{
617 struct strcache2 *cur;
618 for (cur = strcache_head; cur; cur++)
619 strcache2_print_stats (cur, prefix);
620}
621
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette