VirtualBox

source: kBuild/branches/kBuild-0.1.5/src/kmk/strcache2.c@ 2368

Last change on this file since 2368 was 2243, checked in by bird, 16 years ago

*: Updated copyright to 2009 and normalized name & email.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 38.5 KB
Line 
1/* $Id: strcache2.c 2243 2009-01-10 02:24:02Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008-2009 knut st. osmundsen <[email protected]>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 3 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild. If not, see <http://www.gnu.org/licenses/>
23 *
24 */
25
26/*******************************************************************************
27* Header Files *
28*******************************************************************************/
29#include "make.h"
30#include "strcache2.h"
31
32#include <assert.h>
33
34#include "debug.h"
35
36#ifdef _MSC_VER
37typedef unsigned char uint8_t;
38typedef unsigned short uint16_t;
39typedef unsigned int uint32_t;
40typedef signed char int8_t;
41typedef signed short int16_t;
42typedef signed int int32_t;
43#else
44# include <stdint.h>
45#endif
46
47#ifdef WINDOWS32
48# include <io.h>
49# include <process.h>
50# include <Windows.h>
51# define PARSE_IN_WORKER
52#endif
53
54#ifdef __OS2__
55# include <sys/fmutex.h>
56#endif
57
58#ifdef HAVE_PTHREAD
59# include <pthread.h>
60#endif
61
62
63/*******************************************************************************
64* Defined Constants And Macros *
65*******************************************************************************/
66/* The default size of a memory segment (1MB). */
67#define STRCACHE2_SEG_SIZE (1024U*1024U)
68/* The default hash table shift (hash size give as a power of two). */
69#define STRCACHE2_HASH_SHIFT 16
70/** Does the modding / masking of a hash number into an index. */
71#ifdef STRCACHE2_USE_MASK
72# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
73#else
74# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
75#endif
76
77# if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
78 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
79# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
80# else
81 /* (endian doesn't matter) */
82# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
83 | (((const uint8_t *)(ptr))[1]) )
84# endif
85
86
87/*******************************************************************************
88* Global Variables *
89*******************************************************************************/
90/* List of initialized string caches. */
91static struct strcache2 *strcache_head;
92
93
94/** Finds the closest primary number for power of two value (or something else
95 * useful if not support). */
96MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
97{
98 switch (shift)
99 {
100 case 5: return 31;
101 case 6: return 61;
102 case 7: return 127;
103 case 8: return 251;
104 case 9: return 509;
105 case 10: return 1021;
106 case 11: return 2039;
107 case 12: return 4093;
108 case 13: return 8191;
109 case 14: return 16381;
110 case 15: return 32749;
111 case 16: return 65521;
112 case 17: return 131063;
113 case 18: return 262139;
114 case 19: return 524269;
115 case 20: return 1048573;
116 case 21: return 2097143;
117 case 22: return 4194301;
118 case 23: return 8388593;
119
120 default:
121 assert (0);
122 return (1 << shift) - 1;
123 }
124}
125
126/* The following is a bit experiment. It produces longer chains, i.e. worse
127 distribution of the strings in the table, however the actual make
128 performances is better (<time). The explanation is probably that the
129 collisions only really increase for entries that aren't looked up that
130 much and that it actually improoves the situation for those that is. Or
131 that we spend so much less time hashing that it makes up (and more) for
132 the pentalty we suffer from the longer chains and worse distribution.
133
134 XXX: Check how this works out with different path lengths. I suspect it
135 might depend on the length of PATH_ROOT and the depth of the files
136 in the project as well. If it does, this might make matters worse
137 for some and better for others which isn't very cool... */
138
139#if 0
140# define BIG_HASH_SIZE 32 /* kinda fast */
141# define BIG_HASH_HEAD 16
142# define BIG_HASH_TAIL 12
143#elif 0
144# define BIG_HASH_SIZE 68 /* kinda safe */
145# define BIG_HASH_HEAD 24
146# define BIG_HASH_TAIL 24
147#elif 0
148# define BIG_HASH_SIZE 128 /* safe */
149# define BIG_HASH_HEAD 32
150# define BIG_HASH_TAIL 32
151#endif
152
153#ifdef BIG_HASH_SIZE
154/* long string: hash head and tail, drop the middle. */
155MY_INLINE unsigned int
156strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
157{
158 uint32_t hash = len;
159 uint32_t tmp;
160 unsigned int head;
161
162 /* head BIG_HASH_HEAD bytes */
163 head = (BIG_HASH_HEAD >> 2);
164 while (head-- > 0)
165 {
166 hash += strcache2_get_unaligned_16bits (str);
167 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
168 hash = (hash << 16) ^ tmp;
169 str += 2 * sizeof (uint16_t);
170 hash += hash >> 11;
171 }
172
173 /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
174 str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
175 head = (BIG_HASH_TAIL >> 2);
176 while (head-- > 0)
177 {
178 hash += strcache2_get_unaligned_16bits (str);
179 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
180 hash = (hash << 16) ^ tmp;
181 str += 2 * sizeof (uint16_t);
182 hash += hash >> 11;
183 }
184
185 /* force "avalanching" of final 127 bits. */
186 hash ^= hash << 3;
187 hash += hash >> 5;
188 hash ^= hash << 4;
189 hash += hash >> 17;
190 hash ^= hash << 25;
191 hash += hash >> 6;
192
193 return hash;
194}
195#endif /* BIG_HASH_SIZE */
196
197MY_INLINE unsigned int
198strcache2_case_sensitive_hash (const char *str, unsigned int len)
199{
200#if 1
201 /* Paul Hsieh hash SuperFast function:
202 http://www.azillionmonkeys.com/qed/hash.html
203
204 This performs very good and as a sligtly better distribution than
205 STRING_N_HASH_1 on a typical kBuild run.
206
207 It is also 37% faster than return_STRING_N_HASH_1 when running the
208 two 100 times over typical kBuild strings that end up here (did a
209 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
210 with -O2.
211
212 FIXME: A path for well aligned data should be added to speed up
213 execution on alignment sensitive systems. */
214 unsigned int rem;
215 uint32_t hash;
216 uint32_t tmp;
217
218 assert (sizeof (uint8_t) == sizeof (char));
219
220# ifdef BIG_HASH_SIZE
221 /* long string? */
222# if 0 /*BIG_HASH_SIZE > 128*/
223 if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
224# else
225 if (len >= BIG_HASH_SIZE)
226# endif
227 return strcache2_case_sensitive_hash_big (str, len);
228# endif
229
230 /* short string: main loop, walking on 2 x uint16_t */
231 hash = len;
232 rem = len & 3;
233 len >>= 2;
234 while (len > 0)
235 {
236 hash += strcache2_get_unaligned_16bits (str);
237 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
238 hash = (hash << 16) ^ tmp;
239 str += 2 * sizeof (uint16_t);
240 hash += hash >> 11;
241 len--;
242 }
243
244 /* the remainder */
245 switch (rem)
246 {
247 case 3:
248 hash += strcache2_get_unaligned_16bits (str);
249 hash ^= hash << 16;
250 hash ^= str[sizeof (uint16_t)] << 18;
251 hash += hash >> 11;
252 break;
253 case 2:
254 hash += strcache2_get_unaligned_16bits (str);
255 hash ^= hash << 11;
256 hash += hash >> 17;
257 break;
258 case 1:
259 hash += *str;
260 hash ^= hash << 10;
261 hash += hash >> 1;
262 break;
263 }
264
265 /* force "avalanching" of final 127 bits. */
266 hash ^= hash << 3;
267 hash += hash >> 5;
268 hash ^= hash << 4;
269 hash += hash >> 17;
270 hash ^= hash << 25;
271 hash += hash >> 6;
272
273 return hash;
274
275#elif 1
276 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
277 when running the two 100 times over typical kBuild strings that
278 end up here (did a fprintf here and built kBuild).
279 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
280
281 unsigned int hash = 0;
282 if (MY_PREDICT_TRUE(len >= 2))
283 {
284 unsigned int ch0 = *str++;
285 hash = 0;
286 len--;
287 while (len >= 2)
288 {
289 unsigned int ch1 = *str++;
290 hash += ch0 << (ch1 & 0xf);
291
292 ch0 = *str++;
293 hash += ch1 << (ch0 & 0xf);
294
295 len -= 2;
296 }
297 if (len == 1)
298 {
299 unsigned ch1 = *str;
300 hash += ch0 << (ch1 & 0xf);
301
302 hash += ch1;
303 }
304 else
305 hash += ch0;
306 }
307 else if (len)
308 {
309 hash = *str;
310 hash += hash << (hash & 0xf);
311 }
312 else
313 hash = 0;
314 return hash;
315
316#elif 1
317# if 0
318 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
319 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
320 first in the assignment made it noticably slower.)
321
322 However, it is noticably slower in practice, most likely because of more
323 collisions. Hrmpf. */
324
325# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
326 unsigned int hash = 0;
327
328# else
329 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
330 fast than return_STRING_N_HASH_1.
331
332 Ditto. */
333
334# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
335 unsigned int hash = 5381;
336# endif
337
338
339 while (len >= 4)
340 {
341 UPDATE_HASH (str[0]);
342 UPDATE_HASH (str[1]);
343 UPDATE_HASH (str[2]);
344 UPDATE_HASH (str[3]);
345 str += 4;
346 len -= 4;
347 }
348 switch (len)
349 {
350 default:
351 case 0:
352 return hash;
353 case 1:
354 UPDATE_HASH (str[0]);
355 return hash;
356 case 2:
357 UPDATE_HASH (str[0]);
358 UPDATE_HASH (str[1]);
359 return hash;
360 case 3:
361 UPDATE_HASH (str[0]);
362 UPDATE_HASH (str[1]);
363 UPDATE_HASH (str[2]);
364 return hash;
365 }
366#endif
367}
368
369MY_INLINE unsigned int
370strcache2_case_insensitive_hash (const char *str, unsigned int len)
371{
372 unsigned int hash = 0;
373 if (MY_PREDICT_TRUE(len >= 2))
374 {
375 unsigned int ch0 = *str++;
376 ch0 = tolower (ch0);
377 hash = 0;
378 len--;
379 while (len >= 2)
380 {
381 unsigned int ch1 = *str++;
382 ch1 = tolower (ch1);
383 hash += ch0 << (ch1 & 0xf);
384
385 ch0 = *str++;
386 ch0 = tolower (ch0);
387 hash += ch1 << (ch0 & 0xf);
388
389 len -= 2;
390 }
391 if (len == 1)
392 {
393 unsigned ch1 = *str;
394 ch1 = tolower (ch1);
395 hash += ch0 << (ch1 & 0xf);
396
397 hash += ch1;
398 }
399 else
400 hash += ch0;
401 }
402 else if (len)
403 {
404 hash = *str;
405 hash += hash << (hash & 0xf);
406 }
407 else
408 hash = 0;
409 return hash;
410}
411
412MY_INLINE int
413strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
414{
415 if (length <= 8)
416 {
417 /* short string compare - ~50% of the kBuild calls. */
418 assert ( !((size_t)ys & 3) );
419 if (!((size_t)xs & 3))
420 {
421 /* aligned */
422 int result;
423 switch (length)
424 {
425 default: /* memcmp for longer strings */
426 return memcmp (xs, ys, length);
427 case 8:
428 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
429 result |= *(int32_t*)xs - *(int32_t*)ys;
430 return result;
431 case 7:
432 result = xs[6] - ys[6];
433 result |= xs[5] - ys[5];
434 result |= xs[4] - ys[4];
435 result |= *(int32_t*)xs - *(int32_t*)ys;
436 return result;
437 case 6:
438 result = xs[5] - ys[5];
439 result |= xs[4] - ys[4];
440 result |= *(int32_t*)xs - *(int32_t*)ys;
441 return result;
442 case 5:
443 result = xs[4] - ys[4];
444 result |= *(int32_t*)xs - *(int32_t*)ys;
445 return result;
446 case 4:
447 return *(int32_t*)xs - *(int32_t*)ys;
448 case 3:
449 result = xs[2] - ys[2];
450 result |= xs[1] - ys[1];
451 result |= xs[0] - ys[0];
452 return result;
453 case 2:
454 result = xs[1] - ys[1];
455 result |= xs[0] - ys[0];
456 return result;
457 case 1:
458 return *xs - *ys;
459 case 0:
460 return 0;
461 }
462 }
463 else
464 {
465 /* unaligned */
466 int result = 0;
467 switch (length)
468 {
469 case 8: result |= xs[7] - ys[7];
470 case 7: result |= xs[6] - ys[6];
471 case 6: result |= xs[5] - ys[5];
472 case 5: result |= xs[4] - ys[4];
473 case 4: result |= xs[3] - ys[3];
474 case 3: result |= xs[2] - ys[2];
475 case 2: result |= xs[1] - ys[1];
476 case 1: result |= xs[0] - ys[0];
477 case 0:
478 return result;
479 }
480 }
481 }
482
483 /* memcmp for longer strings */
484 return memcmp (xs, ys, length);
485}
486
487MY_INLINE int
488strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
489{
490#ifndef ELECTRIC_HEAP
491 assert ( !((size_t)ys & 3) );
492#endif
493 if (!((size_t)xs & 3))
494 {
495 int result;
496 /* aligned */
497 while (length >= 8)
498 {
499 result = *(int32_t*)xs - *(int32_t*)ys;
500 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
501 if (MY_PREDICT_FALSE(result))
502 return result;
503 xs += 8;
504 ys += 8;
505 length -= 8;
506 }
507 switch (length)
508 {
509 case 7:
510 result = *(int32_t*)xs - *(int32_t*)ys;
511 result |= xs[6] - ys[6];
512 result |= xs[5] - ys[5];
513 result |= xs[4] - ys[4];
514 return result;
515 case 6:
516 result = *(int32_t*)xs - *(int32_t*)ys;
517 result |= xs[5] - ys[5];
518 result |= xs[4] - ys[4];
519 return result;
520 case 5:
521 result = *(int32_t*)xs - *(int32_t*)ys;
522 result |= xs[4] - ys[4];
523 return result;
524 case 4:
525 return *(int32_t*)xs - *(int32_t*)ys;
526 case 3:
527 result = xs[2] - ys[2];
528 result |= xs[1] - ys[1];
529 result |= xs[0] - ys[0];
530 return result;
531 case 2:
532 result = xs[1] - ys[1];
533 result |= xs[0] - ys[0];
534 return result;
535 case 1:
536 return *xs - *ys;
537 default:
538 case 0:
539 return 0;
540 }
541 }
542 else
543 {
544 /* unaligned */
545 int result;
546 while (length >= 8)
547 {
548#if defined(__i386__) || defined(__x86_64__)
549 result = ( ((int32_t)xs[3] << 24)
550 | ((int32_t)xs[2] << 16)
551 | ((int32_t)xs[1] << 8)
552 | xs[0] )
553 - *(int32_t*)ys;
554 result |= ( ((int32_t)xs[7] << 24)
555 | ((int32_t)xs[6] << 16)
556 | ((int32_t)xs[5] << 8)
557 | xs[4] )
558 - *(int32_t*)(ys + 4);
559#else
560 result = xs[3] - ys[3];
561 result |= xs[2] - ys[2];
562 result |= xs[1] - ys[1];
563 result |= xs[0] - ys[0];
564 result |= xs[7] - ys[7];
565 result |= xs[6] - ys[6];
566 result |= xs[5] - ys[5];
567 result |= xs[4] - ys[4];
568#endif
569 if (MY_PREDICT_FALSE(result))
570 return result;
571 xs += 8;
572 ys += 8;
573 length -= 8;
574 }
575 result = 0;
576 switch (length)
577 {
578 case 7: result |= xs[6] - ys[6];
579 case 6: result |= xs[5] - ys[5];
580 case 5: result |= xs[4] - ys[4];
581 case 4: result |= xs[3] - ys[3];
582 case 3: result |= xs[2] - ys[2];
583 case 2: result |= xs[1] - ys[1];
584 case 1: result |= xs[0] - ys[0];
585 return result;
586 default:
587 case 0:
588 return 0;
589 }
590 }
591}
592
593MY_INLINE int
594strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
595 const char *str, unsigned int length, unsigned int hash)
596{
597 assert (!cache->case_insensitive);
598
599 /* the simple stuff first. */
600 if ( entry->hash != hash
601 || entry->length != length)
602 return 0;
603
604#if 0
605 return memcmp (str, entry + 1, length) == 0;
606#elif 1
607 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
608#else
609 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
610#endif
611}
612
613MY_INLINE int
614strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
615 const char *str, unsigned int length, unsigned int hash)
616{
617 assert (cache->case_insensitive);
618
619 /* the simple stuff first. */
620 if ( entry->hash != hash
621 || entry->length != length)
622 return 0;
623
624#if defined(_MSC_VER) || defined(__OS2__)
625 return _memicmp (entry + 1, str, length) == 0;
626#else
627 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
628#endif
629}
630
631static void
632strcache2_rehash (struct strcache2 *cache)
633{
634 unsigned int src = cache->hash_size;
635 struct strcache2_entry **src_tab = cache->hash_tab;
636 struct strcache2_entry **dst_tab;
637#ifndef STRCACHE2_USE_MASK
638 unsigned int hash_shift;
639#endif
640
641 /* Allocate a new hash table twice the size of the current. */
642 cache->hash_size <<= 1;
643#ifdef STRCACHE2_USE_MASK
644 cache->hash_mask <<= 1;
645 cache->hash_mask |= 1;
646#else
647 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
648 /* nothing */;
649 cache->hash_div = strcache2_find_prime (hash_shift);
650#endif
651 cache->rehash_count <<= 1;
652 cache->hash_tab = dst_tab = (struct strcache2_entry **)
653 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
654 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
655
656 /* Copy the entries from the old to the new hash table. */
657 cache->collision_count = 0;
658 while (src-- > 0)
659 {
660 struct strcache2_entry *entry = src_tab[src];
661 while (entry)
662 {
663 struct strcache2_entry *next = entry->next;
664 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
665 if ((entry->next = dst_tab[dst]) != 0)
666 cache->collision_count++;
667 dst_tab[dst] = entry;
668
669 entry = next;
670 }
671 }
672
673 /* That's it, just free the old table and we're done. */
674 free (src_tab);
675}
676
677static struct strcache2_seg *
678strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
679{
680 struct strcache2_seg *seg;
681 size_t size;
682 size_t off;
683
684 size = cache->def_seg_size;
685 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
686 {
687 size = (size_t)minlen * 2;
688 size = (size + 0xfff) & ~(size_t)0xfff;
689 }
690
691 seg = xmalloc (size);
692 seg->start = (char *)(seg + 1);
693 seg->size = size - sizeof (struct strcache2_seg);
694 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
695 if (off)
696 {
697 off = STRCACHE2_ENTRY_ALIGNMENT - off;
698 seg->start += off;
699 seg->size -= off;
700 }
701 assert (seg->size > minlen);
702 seg->cursor = seg->start;
703 seg->avail = seg->size;
704
705 seg->next = cache->seg_head;
706 cache->seg_head = seg;
707
708 return seg;
709}
710
711/* Internal worker that enters a new string into the cache. */
712static const char *
713strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
714 const char *str, unsigned int length,
715 unsigned int hash)
716{
717 struct strcache2_entry *entry;
718 struct strcache2_seg *seg;
719 unsigned int size;
720 char *str_copy;
721
722 /* Allocate space for the string. */
723
724 size = length + 1 + sizeof (struct strcache2_entry);
725 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
726
727 seg = cache->seg_head;
728 if (MY_PREDICT_FALSE(seg->avail < size))
729 {
730 do
731 seg = seg->next;
732 while (seg && seg->avail < size);
733 if (!seg)
734 seg = strcache2_new_seg (cache, size);
735 }
736
737 entry = (struct strcache2_entry *) seg->cursor;
738 assert (!((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1)));
739 seg->cursor += size;
740 seg->avail -= size;
741
742 /* Setup the entry, copy the string and insert it into the hash table. */
743
744 entry->user = NULL;
745 entry->length = length;
746 entry->hash = hash;
747 str_copy = (char *) memcpy (entry + 1, str, length);
748 str_copy[length] = '\0';
749
750 if ((entry->next = cache->hash_tab[idx]) != 0)
751 cache->collision_count++;
752 cache->hash_tab[idx] = entry;
753 cache->count++;
754 if (cache->count >= cache->rehash_count)
755 strcache2_rehash (cache);
756
757 return str_copy;
758}
759
760/* The public add string interface. */
761const char *
762strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
763{
764 struct strcache2_entry const *entry;
765 unsigned int hash = strcache2_case_sensitive_hash (str, length);
766 unsigned int idx;
767
768 assert (!cache->case_insensitive);
769 assert (!memchr (str, '\0', length));
770
771 MAKE_STATS (cache->lookup_count++);
772
773 /* Lookup the entry in the hash table, hoping for an
774 early match. If not found, enter the string at IDX. */
775 idx = STRCACHE2_MOD_IT (cache, hash);
776 entry = cache->hash_tab[idx];
777 if (!entry)
778 return strcache2_enter_string (cache, idx, str, length, hash);
779 if (strcache2_is_equal (cache, entry, str, length, hash))
780 return (const char *)(entry + 1);
781 MAKE_STATS (cache->collision_1st_count++);
782
783 entry = entry->next;
784 if (!entry)
785 return strcache2_enter_string (cache, idx, str, length, hash);
786 if (strcache2_is_equal (cache, entry, str, length, hash))
787 return (const char *)(entry + 1);
788 MAKE_STATS (cache->collision_2nd_count++);
789
790 /* Loop the rest. */
791 for (;;)
792 {
793 entry = entry->next;
794 if (!entry)
795 return strcache2_enter_string (cache, idx, str, length, hash);
796 if (strcache2_is_equal (cache, entry, str, length, hash))
797 return (const char *)(entry + 1);
798 MAKE_STATS (cache->collision_3rd_count++);
799 }
800 /* not reached */
801}
802
803/* The public add string interface for prehashed strings.
804 Use strcache2_hash_str to calculate the hash of a string. */
805const char *
806strcache2_add_hashed (struct strcache2 *cache, const char *str,
807 unsigned int length, unsigned int hash)
808{
809 struct strcache2_entry const *entry;
810 unsigned int idx;
811#ifndef NDEBUG
812 unsigned correct_hash;
813
814 assert (!cache->case_insensitive);
815 assert (!memchr (str, '\0', length));
816 correct_hash = strcache2_case_sensitive_hash (str, length);
817 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
818#endif /* NDEBUG */
819
820 MAKE_STATS (cache->lookup_count++);
821
822 /* Lookup the entry in the hash table, hoping for an
823 early match. If not found, enter the string at IDX. */
824 idx = STRCACHE2_MOD_IT (cache, hash);
825 entry = cache->hash_tab[idx];
826 if (!entry)
827 return strcache2_enter_string (cache, idx, str, length, hash);
828 if (strcache2_is_equal (cache, entry, str, length, hash))
829 return (const char *)(entry + 1);
830 MAKE_STATS (cache->collision_1st_count++);
831
832 entry = entry->next;
833 if (!entry)
834 return strcache2_enter_string (cache, idx, str, length, hash);
835 if (strcache2_is_equal (cache, entry, str, length, hash))
836 return (const char *)(entry + 1);
837 MAKE_STATS (cache->collision_2nd_count++);
838
839 /* Loop the rest. */
840 for (;;)
841 {
842 entry = entry->next;
843 if (!entry)
844 return strcache2_enter_string (cache, idx, str, length, hash);
845 if (strcache2_is_equal (cache, entry, str, length, hash))
846 return (const char *)(entry + 1);
847 MAKE_STATS (cache->collision_3rd_count++);
848 }
849 /* not reached */
850}
851
852/* The public lookup (case sensitive) string interface. */
853const char *
854strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
855{
856 struct strcache2_entry const *entry;
857 unsigned int hash = strcache2_case_sensitive_hash (str, length);
858 unsigned int idx;
859
860 assert (!cache->case_insensitive);
861 assert (!memchr (str, '\0', length));
862
863 MAKE_STATS (cache->lookup_count++);
864
865 /* Lookup the entry in the hash table, hoping for an
866 early match. */
867 idx = STRCACHE2_MOD_IT (cache, hash);
868 entry = cache->hash_tab[idx];
869 if (!entry)
870 return NULL;
871 if (strcache2_is_equal (cache, entry, str, length, hash))
872 return (const char *)(entry + 1);
873 MAKE_STATS (cache->collision_1st_count++);
874
875 entry = entry->next;
876 if (!entry)
877 return NULL;
878 if (strcache2_is_equal (cache, entry, str, length, hash))
879 return (const char *)(entry + 1);
880 MAKE_STATS (cache->collision_2nd_count++);
881
882 /* Loop the rest. */
883 for (;;)
884 {
885 entry = entry->next;
886 if (!entry)
887 return NULL;
888 if (strcache2_is_equal (cache, entry, str, length, hash))
889 return (const char *)(entry + 1);
890 MAKE_STATS (cache->collision_3rd_count++);
891 }
892 /* not reached */
893}
894
895#if defined(HAVE_CASE_INSENSITIVE_FS)
896
897/* The public add string interface for case insensitive strings. */
898const char *
899strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
900{
901 struct strcache2_entry const *entry;
902 unsigned int hash = strcache2_case_insensitive_hash (str, length);
903 unsigned int idx;
904
905 assert (cache->case_insensitive);
906 assert (!memchr (str, '\0', length));
907
908 MAKE_STATS (cache->lookup_count++);
909
910 /* Lookup the entry in the hash table, hoping for an
911 early match. If not found, enter the string at IDX. */
912 idx = STRCACHE2_MOD_IT (cache, hash);
913 entry = cache->hash_tab[idx];
914 if (!entry)
915 return strcache2_enter_string (cache, idx, str, length, hash);
916 if (strcache2_is_equal (cache, entry, str, length, hash))
917 return (const char *)(entry + 1);
918 MAKE_STATS (cache->collision_1st_count++);
919
920 entry = entry->next;
921 if (!entry)
922 return strcache2_enter_string (cache, idx, str, length, hash);
923 if (strcache2_is_equal (cache, entry, str, length, hash))
924 return (const char *)(entry + 1);
925 MAKE_STATS (cache->collision_2nd_count++);
926
927 /* Loop the rest. */
928 for (;;)
929 {
930 entry = entry->next;
931 if (!entry)
932 return strcache2_enter_string (cache, idx, str, length, hash);
933 if (strcache2_is_equal (cache, entry, str, length, hash))
934 return (const char *)(entry + 1);
935 MAKE_STATS (cache->collision_3rd_count++);
936 }
937 /* not reached */
938}
939
940/* The public add string interface for prehashed case insensitive strings.
941 Use strcache2_hash_istr to calculate the hash of a string. */
942const char *
943strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
944 unsigned int length, unsigned int hash)
945{
946 struct strcache2_entry const *entry;
947 unsigned int idx;
948#ifndef NDEBUG
949 unsigned correct_hash;
950
951 assert (cache->case_insensitive);
952 assert (!memchr (str, '\0', length));
953 correct_hash = strcache2_case_insensitive_hash (str, length);
954 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
955#endif /* NDEBUG */
956
957 MAKE_STATS (cache->lookup_count++);
958
959 /* Lookup the entry in the hash table, hoping for an
960 early match. If not found, enter the string at IDX. */
961 idx = STRCACHE2_MOD_IT (cache, hash);
962 entry = cache->hash_tab[idx];
963 if (!entry)
964 return strcache2_enter_string (cache, idx, str, length, hash);
965 if (strcache2_is_equal (cache, entry, str, length, hash))
966 return (const char *)(entry + 1);
967 MAKE_STATS (cache->collision_1st_count++);
968
969 entry = entry->next;
970 if (!entry)
971 return strcache2_enter_string (cache, idx, str, length, hash);
972 if (strcache2_is_equal (cache, entry, str, length, hash))
973 return (const char *)(entry + 1);
974 MAKE_STATS (cache->collision_2nd_count++);
975
976 /* Loop the rest. */
977 for (;;)
978 {
979 entry = entry->next;
980 if (!entry)
981 return strcache2_enter_string (cache, idx, str, length, hash);
982 if (strcache2_is_equal (cache, entry, str, length, hash))
983 return (const char *)(entry + 1);
984 MAKE_STATS (cache->collision_3rd_count++);
985 }
986 /* not reached */
987}
988
989/* The public lookup (case insensitive) string interface. */
990const char *
991strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
992{
993 struct strcache2_entry const *entry;
994 unsigned int hash = strcache2_case_insensitive_hash (str, length);
995 unsigned int idx;
996
997 assert (cache->case_insensitive);
998 assert (!memchr (str, '\0', length));
999
1000 MAKE_STATS (cache->lookup_count++);
1001
1002 /* Lookup the entry in the hash table, hoping for an
1003 early match. */
1004 idx = STRCACHE2_MOD_IT (cache, hash);
1005 entry = cache->hash_tab[idx];
1006 if (!entry)
1007 return NULL;
1008 if (strcache2_is_equal (cache, entry, str, length, hash))
1009 return (const char *)(entry + 1);
1010 MAKE_STATS (cache->collision_1st_count++);
1011
1012 entry = entry->next;
1013 if (!entry)
1014 return NULL;
1015 if (strcache2_is_equal (cache, entry, str, length, hash))
1016 return (const char *)(entry + 1);
1017 MAKE_STATS (cache->collision_2nd_count++);
1018
1019 /* Loop the rest. */
1020 for (;;)
1021 {
1022 entry = entry->next;
1023 if (!entry)
1024 return NULL;
1025 if (strcache2_is_equal (cache, entry, str, length, hash))
1026 return (const char *)(entry + 1);
1027 MAKE_STATS (cache->collision_3rd_count++);
1028 }
1029 /* not reached */
1030}
1031
1032#endif /* HAVE_CASE_INSENSITIVE_FS */
1033
1034/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1035int
1036strcache2_is_cached (struct strcache2 *cache, const char *str)
1037{
1038 /* Check mandatory alignment first. */
1039 if (!((size_t)str & (sizeof (void *) - 1)))
1040 {
1041 /* Check the segment list and consider the question answered if the
1042 string is within one of them. (Could check it more thoroughly...) */
1043 struct strcache2_seg const *seg;
1044 for (seg = cache->seg_head; seg; seg = seg->next)
1045 if ((size_t)(str - seg->start) < seg->size)
1046 return 1;
1047 }
1048
1049 return 0;
1050}
1051
1052
1053/* Verify the integrity of the specified string, returning 0 if OK. */
1054int
1055strcache2_verify_entry (struct strcache2 *cache, const char *str)
1056{
1057 struct strcache2_entry const *entry;
1058 unsigned int hash;
1059 unsigned int length;
1060 const char *end;
1061
1062 entry = (struct strcache2_entry const *)str - 1;
1063 if ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1))
1064 {
1065 fprintf (stderr,
1066 "strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
1067 cache->name, (void *)entry, (void *)str, str);
1068 return -1;
1069 }
1070
1071 end = memchr (str, '\0', entry->length + 1);
1072 length = end - str;
1073 if (length != entry->length)
1074 {
1075 fprintf (stderr,
1076 "strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
1077 cache->name, (void *)entry, length, entry->length, str);
1078 return -1;
1079 }
1080
1081 hash = cache->case_insensitive
1082 ? strcache2_case_insensitive_hash (str, entry->length)
1083 : strcache2_case_sensitive_hash (str, entry->length);
1084 if (hash != entry->hash)
1085 {
1086 fprintf (stderr,
1087 "strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
1088 cache->name, (void *)entry, hash, entry->hash, str);
1089 return -1;
1090 }
1091
1092 return 0;
1093}
1094
1095
1096/* Calculates the case sensitive hash values of the string.
1097 The first hash is returned, the other is put at HASH2P. */
1098unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1099{
1100 *hash2p = 1;
1101 return strcache2_case_sensitive_hash (str, length);
1102}
1103
1104/* Calculates the case insensitive hash values of the string.
1105 The first hash is returned, the other is put at HASH2P. */
1106unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1107{
1108 *hash2p = 1;
1109 return strcache2_case_insensitive_hash (str, length);
1110}
1111
1112
1113
1114/* Initalizes a new cache. */
1115void
1116strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1117 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1118{
1119 unsigned hash_shift;
1120 assert (!thread_safe);
1121
1122 /* calc the size as a power of two */
1123 if (!size)
1124 hash_shift = STRCACHE2_HASH_SHIFT;
1125 else
1126 {
1127 assert (size <= (~0U / 2 + 1));
1128 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1129 /* nothing */;
1130 }
1131
1132 /* adjust the default segment size */
1133 if (!def_seg_size)
1134 def_seg_size = STRCACHE2_SEG_SIZE;
1135 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1136 def_seg_size = sizeof (struct strcache2_seg) * 10;
1137 else if ((def_seg_size & 0xfff) < 0xf00)
1138 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1139
1140
1141 /* init the structure. */
1142 cache->case_insensitive = case_insensitive;
1143#ifdef STRCACHE2_USE_MASK
1144 cache->hash_mask = (1U << hash_shift) - 1U;
1145#else
1146 cache->hash_div = strcache2_find_prime(hash_shift);
1147#endif
1148 cache->count = 0;
1149 cache->collision_count = 0;
1150 cache->lookup_count = 0;
1151 cache->collision_1st_count = 0;
1152 cache->collision_2nd_count = 0;
1153 cache->collision_3rd_count = 0;
1154 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1155 cache->init_size = 1U << hash_shift;
1156 cache->hash_size = 1U << hash_shift;
1157 cache->def_seg_size = def_seg_size;
1158 cache->lock = NULL;
1159 cache->name = name;
1160
1161 /* allocate the hash table and first segment. */
1162 cache->hash_tab = (struct strcache2_entry **)
1163 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1164 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1165 strcache2_new_seg (cache, 0);
1166
1167 /* link it */
1168 cache->next = strcache_head;
1169 strcache_head = cache;
1170}
1171
1172
1173/* Terminates a string cache, freeing all memory and other
1174 associated resources. */
1175void
1176strcache2_term (struct strcache2 *cache)
1177{
1178 /* unlink it */
1179 if (strcache_head == cache)
1180 strcache_head = cache->next;
1181 else
1182 {
1183 struct strcache2 *prev = strcache_head;
1184 while (prev->next != cache)
1185 prev = prev->next;
1186 assert (prev);
1187 prev->next = cache->next;
1188 }
1189
1190 /* free the memory segments */
1191 do
1192 {
1193 void *free_it = cache->seg_head;
1194 cache->seg_head = cache->seg_head->next;
1195 free (free_it);
1196 }
1197 while (cache->seg_head);
1198
1199 /* free the hash and clear the structure. */
1200 free (cache->hash_tab);
1201 memset (cache, '\0', sizeof (struct strcache2));
1202}
1203
1204/* Print statistics a string cache. */
1205void
1206strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1207{
1208 unsigned int seg_count = 0;
1209 unsigned long seg_total_bytes = 0;
1210 unsigned long seg_avg_bytes;
1211 unsigned long seg_avail_bytes = 0;
1212 unsigned long seg_max_bytes = 0;
1213 struct strcache2_seg *seg;
1214 unsigned int str_count = 0;
1215 unsigned long str_total_len = 0;
1216 unsigned int str_avg_len;
1217 unsigned int str_min_len = ~0U;
1218 unsigned int str_max_len = 0;
1219 unsigned int idx;
1220 unsigned int rehashes;
1221 unsigned int chain_depths[32];
1222
1223 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1224
1225 /* Segment statistics. */
1226 for (seg = cache->seg_head; seg; seg = seg->next)
1227 {
1228 seg_count++;
1229 seg_total_bytes += seg->size;
1230 seg_avail_bytes += seg->avail;
1231 if (seg->size > seg_max_bytes)
1232 seg_max_bytes = seg->size;
1233 }
1234 seg_avg_bytes = seg_total_bytes / seg_count;
1235 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1236 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1237 cache->def_seg_size, seg_avail_bytes);
1238
1239 /* String statistics. */
1240 memset (chain_depths, '\0', sizeof (chain_depths));
1241 idx = cache->hash_size;
1242 while (idx-- > 0)
1243 {
1244 struct strcache2_entry const *entry = cache->hash_tab[idx];
1245 unsigned int depth = 0;
1246 for (; entry != 0; entry = entry->next, depth++)
1247 {
1248 unsigned int length = entry->length;
1249 str_total_len += length;
1250 if (length > str_max_len)
1251 str_max_len = length;
1252 if (length < str_min_len)
1253 str_min_len = length;
1254 str_count++;
1255 }
1256 chain_depths[depth >= 32 ? 31 : depth]++;
1257 }
1258 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1259 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1260 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1261 if (str_count != cache->count)
1262 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1263 cache->count, str_count);
1264
1265 /* Hash statistics. */
1266 idx = cache->init_size;
1267 rehashes = 0;
1268 while (idx < cache->hash_size)
1269 {
1270 idx *= 2;
1271 rehashes++;
1272 }
1273
1274#ifdef STRCACHE2_USE_MASK
1275 printf (_("%s hash size = %u mask = %#x rehashed %u times"),
1276 prefix, cache->hash_size, cache->hash_mask, rehashes);
1277#else
1278 printf (_("%s hash size = %u div = %#x rehashed %u times"),
1279 prefix, cache->hash_size, cache->hash_div, rehashes);
1280#endif
1281 if (cache->lookup_count)
1282 printf (_("%s lookups = %lu\n"
1283 "%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
1284 prefix, cache->lookup_count,
1285 prefix,
1286 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1287 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1288 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1289 printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
1290 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1291 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1292 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1293 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1294 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1295 for (idx = 2; idx < 32; idx++)
1296 {
1297 unsigned strs_at_this_depth = chain_depths[idx];
1298 unsigned i;
1299 for (i = idx + 1; i < 32; i++)
1300 strs_at_this_depth += chain_depths[i];
1301 if (strs_at_this_depth)
1302 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1303 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1304 idx - 1, idx == 2 ? " " : "s",
1305 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1306 }
1307}
1308
1309/* Print statistics for all string caches. */
1310void
1311strcache2_print_stats_all (const char *prefix)
1312{
1313 struct strcache2 *cur;
1314 for (cur = strcache_head; cur; cur = cur->next)
1315 strcache2_print_stats (cur, prefix);
1316}
1317
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette