VirtualBox

source: kBuild/trunk/src/kmk/strcache2.c@ 1921

Last change on this file since 1921 was 1921, checked in by bird, 16 years ago

strcache2: disabled the experimental stuff because I'm unsure about the actual gain on all CPUs.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 38.2 KB
Line 
1/* $Id: strcache2.c 1921 2008-10-24 00:30:58Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <[email protected]>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef _MSC_VER
39typedef unsigned char uint8_t;
40typedef unsigned short uint16_t;
41typedef unsigned int uint32_t;
42typedef signed char int8_t;
43typedef signed short int16_t;
44typedef signed int int32_t;
45#else
46# include <stdint.h>
47#endif
48
49#ifdef WINDOWS32
50# include <io.h>
51# include <process.h>
52# include <Windows.h>
53# define PARSE_IN_WORKER
54#endif
55
56#ifdef __OS2__
57# include <sys/fmutex.h>
58#endif
59
60#ifdef HAVE_PTHREAD
61# include <pthread.h>
62#endif
63
64
65/*******************************************************************************
66* Defined Constants And Macros *
67*******************************************************************************/
68/* The default size of a memory segment (1MB). */
69#define STRCACHE2_SEG_SIZE (1024U*1024U)
70/* The default hash table shift (hash size give as a power of two). */
71#define STRCACHE2_HASH_SHIFT 16
72/** Does the modding / masking of a hash number into an index. */
73#ifdef STRCACHE2_USE_MASK
74# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
75#else
76# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
77#endif
78
79# if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
80 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
81# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
82# else
83 /* (endian doesn't matter) */
84# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
85 | (((const uint8_t *)(ptr))[1]) )
86# endif
87
88
89/*******************************************************************************
90* Global Variables *
91*******************************************************************************/
92/* List of initialized string caches. */
93static struct strcache2 *strcache_head;
94
95
96/** Finds the closest primary number for power of two value (or something else
97 * useful if not support). */
98MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
99{
100 switch (shift)
101 {
102 case 5: return 31;
103 case 6: return 61;
104 case 7: return 127;
105 case 8: return 251;
106 case 9: return 509;
107 case 10: return 1021;
108 case 11: return 2039;
109 case 12: return 4093;
110 case 13: return 8191;
111 case 14: return 16381;
112 case 15: return 32749;
113 case 16: return 65521;
114 case 17: return 131063;
115 case 18: return 262139;
116 case 19: return 524269;
117 case 20: return 1048573;
118 case 21: return 2097143;
119 case 22: return 4194301;
120 case 23: return 8388593;
121
122 default:
123 assert (0);
124 return (1 << shift) - 1;
125 }
126}
127
128/* The following is a bit experiment. It produces longer chains, i.e. worse
129 distribution of the strings in the table, however the actual make
130 performances is better (<time). The explanation is probably that the
131 collisions only really increase for entries that aren't looked up that
132 much and that it actually improoves the situation for those that is. Or
133 that we spend so much less time hashing that it makes up (and more) for
134 the pentalty we suffer from the longer chains and worse distribution.
135
136 XXX: Check how this works out with different path lengths. I suspect it
137 might depend on the length of PATH_ROOT and the depth of the files
138 in the project as well. If it does, this might make matters worse
139 for some and better for others which isn't very cool... */
140
141#if 0
142# define BIG_HASH_SIZE 32 /* kinda fast */
143# define BIG_HASH_HEAD 16
144# define BIG_HASH_TAIL 12
145#elif 0
146# define BIG_HASH_SIZE 68 /* kinda safe */
147# define BIG_HASH_HEAD 24
148# define BIG_HASH_TAIL 24
149#elif 0
150# define BIG_HASH_SIZE 128 /* safe */
151# define BIG_HASH_HEAD 32
152# define BIG_HASH_TAIL 32
153#endif
154
155#ifdef BIG_HASH_SIZE
156/* long string: hash head and tail, drop the middle. */
157MY_INLINE unsigned int
158strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
159{
160 uint32_t hash = len;
161 uint32_t tmp;
162 unsigned int head;
163
164 /* head BIG_HASH_HEAD bytes */
165 head = (BIG_HASH_HEAD >> 2);
166 while (head-- > 0)
167 {
168 hash += strcache2_get_unaligned_16bits (str);
169 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
170 hash = (hash << 16) ^ tmp;
171 str += 2 * sizeof (uint16_t);
172 hash += hash >> 11;
173 }
174
175 /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
176 str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
177 head = (BIG_HASH_TAIL >> 2);
178 while (head-- > 0)
179 {
180 hash += strcache2_get_unaligned_16bits (str);
181 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
182 hash = (hash << 16) ^ tmp;
183 str += 2 * sizeof (uint16_t);
184 hash += hash >> 11;
185 }
186
187 /* force "avalanching" of final 127 bits. */
188 hash ^= hash << 3;
189 hash += hash >> 5;
190 hash ^= hash << 4;
191 hash += hash >> 17;
192 hash ^= hash << 25;
193 hash += hash >> 6;
194
195 return hash;
196}
197#endif /* BIG_HASH_SIZE */
198
199MY_INLINE unsigned int
200strcache2_case_sensitive_hash (const char *str, unsigned int len)
201{
202#if 1
203 /* Paul Hsieh hash SuperFast function:
204 http://www.azillionmonkeys.com/qed/hash.html
205
206 This performs very good and as a sligtly better distribution than
207 STRING_N_HASH_1 on a typical kBuild run.
208
209 It is also 37% faster than return_STRING_N_HASH_1 when running the
210 two 100 times over typical kBuild strings that end up here (did a
211 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
212 with -O2.
213
214 FIXME: A path for well aligned data should be added to speed up
215 execution on alignment sensitive systems. */
216 unsigned int rem;
217 uint32_t hash;
218 uint32_t tmp;
219
220 assert (sizeof (uint8_t) == sizeof (char));
221
222# ifdef BIG_HASH_SIZE
223 /* long string? */
224# if 0 /*BIG_HASH_SIZE > 128*/
225 if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
226# else
227 if (len >= BIG_HASH_SIZE)
228# endif
229 return strcache2_case_sensitive_hash_big (str, len);
230# endif
231
232 /* short string: main loop, walking on 2 x uint16_t */
233 hash = len;
234 rem = len & 3;
235 len >>= 2;
236 while (len > 0)
237 {
238 hash += strcache2_get_unaligned_16bits (str);
239 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
240 hash = (hash << 16) ^ tmp;
241 str += 2 * sizeof (uint16_t);
242 hash += hash >> 11;
243 len--;
244 }
245
246 /* the remainder */
247 switch (rem)
248 {
249 case 3:
250 hash += strcache2_get_unaligned_16bits (str);
251 hash ^= hash << 16;
252 hash ^= str[sizeof (uint16_t)] << 18;
253 hash += hash >> 11;
254 break;
255 case 2:
256 hash += strcache2_get_unaligned_16bits (str);
257 hash ^= hash << 11;
258 hash += hash >> 17;
259 break;
260 case 1:
261 hash += *str;
262 hash ^= hash << 10;
263 hash += hash >> 1;
264 break;
265 }
266
267 /* force "avalanching" of final 127 bits. */
268 hash ^= hash << 3;
269 hash += hash >> 5;
270 hash ^= hash << 4;
271 hash += hash >> 17;
272 hash ^= hash << 25;
273 hash += hash >> 6;
274
275 return hash;
276
277#elif 1
278 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
279 when running the two 100 times over typical kBuild strings that
280 end up here (did a fprintf here and built kBuild).
281 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
282
283 unsigned int hash = 0;
284 if (MY_PREDICT_TRUE(len >= 2))
285 {
286 unsigned int ch0 = *str++;
287 hash = 0;
288 len--;
289 while (len >= 2)
290 {
291 unsigned int ch1 = *str++;
292 hash += ch0 << (ch1 & 0xf);
293
294 ch0 = *str++;
295 hash += ch1 << (ch0 & 0xf);
296
297 len -= 2;
298 }
299 if (len == 1)
300 {
301 unsigned ch1 = *str;
302 hash += ch0 << (ch1 & 0xf);
303
304 hash += ch1;
305 }
306 else
307 hash += ch0;
308 }
309 else if (len)
310 {
311 hash = *str;
312 hash += hash << (hash & 0xf);
313 }
314 else
315 hash = 0;
316 return hash;
317
318#elif 1
319# if 0
320 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
321 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
322 first in the assignment made it noticably slower.)
323
324 However, it is noticably slower in practice, most likely because of more
325 collisions. Hrmpf. */
326
327# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
328 unsigned int hash = 0;
329
330# else
331 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
332 fast than return_STRING_N_HASH_1.
333
334 Ditto. */
335
336# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
337 unsigned int hash = 5381;
338# endif
339
340
341 while (len >= 4)
342 {
343 UPDATE_HASH (str[0]);
344 UPDATE_HASH (str[1]);
345 UPDATE_HASH (str[2]);
346 UPDATE_HASH (str[3]);
347 str += 4;
348 len -= 4;
349 }
350 switch (len)
351 {
352 default:
353 case 0:
354 return hash;
355 case 1:
356 UPDATE_HASH (str[0]);
357 return hash;
358 case 2:
359 UPDATE_HASH (str[0]);
360 UPDATE_HASH (str[1]);
361 return hash;
362 case 3:
363 UPDATE_HASH (str[0]);
364 UPDATE_HASH (str[1]);
365 UPDATE_HASH (str[2]);
366 return hash;
367 }
368#endif
369}
370
371MY_INLINE unsigned int
372strcache2_case_insensitive_hash (const char *str, unsigned int len)
373{
374 unsigned int hash = 0;
375 if (MY_PREDICT_TRUE(len >= 2))
376 {
377 unsigned int ch0 = *str++;
378 ch0 = tolower (ch0);
379 hash = 0;
380 len--;
381 while (len >= 2)
382 {
383 unsigned int ch1 = *str++;
384 ch1 = tolower (ch1);
385 hash += ch0 << (ch1 & 0xf);
386
387 ch0 = *str++;
388 ch0 = tolower (ch0);
389 hash += ch1 << (ch0 & 0xf);
390
391 len -= 2;
392 }
393 if (len == 1)
394 {
395 unsigned ch1 = *str;
396 ch1 = tolower (ch1);
397 hash += ch0 << (ch1 & 0xf);
398
399 hash += ch1;
400 }
401 else
402 hash += ch0;
403 }
404 else if (len)
405 {
406 hash = *str;
407 hash += hash << (hash & 0xf);
408 }
409 else
410 hash = 0;
411 return hash;
412}
413
414MY_INLINE int
415strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
416{
417 if (length <= 8)
418 {
419 /* short string compare - ~50% of the kBuild calls. */
420 assert ( !((size_t)ys & 3) );
421 if (!((size_t)xs & 3))
422 {
423 /* aligned */
424 int result;
425 switch (length)
426 {
427 default: /* memcmp for longer strings */
428 return memcmp (xs, ys, length);
429 case 8:
430 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
431 result |= *(int32_t*)xs - *(int32_t*)ys;
432 return result;
433 case 7:
434 result = xs[6] - ys[6];
435 result |= xs[5] - ys[5];
436 result |= xs[4] - ys[4];
437 result |= *(int32_t*)xs - *(int32_t*)ys;
438 return result;
439 case 6:
440 result = xs[5] - ys[5];
441 result |= xs[4] - ys[4];
442 result |= *(int32_t*)xs - *(int32_t*)ys;
443 return result;
444 case 5:
445 result = xs[4] - ys[4];
446 result |= *(int32_t*)xs - *(int32_t*)ys;
447 return result;
448 case 4:
449 return *(int32_t*)xs - *(int32_t*)ys;
450 case 3:
451 result = xs[2] - ys[2];
452 result |= xs[1] - ys[1];
453 result |= xs[0] - ys[0];
454 return result;
455 case 2:
456 result = xs[1] - ys[1];
457 result |= xs[0] - ys[0];
458 return result;
459 case 1:
460 return *xs - *ys;
461 case 0:
462 return 0;
463 }
464 }
465 else
466 {
467 /* unaligned */
468 int result = 0;
469 switch (length)
470 {
471 case 8: result |= xs[7] - ys[7];
472 case 7: result |= xs[6] - ys[6];
473 case 6: result |= xs[5] - ys[5];
474 case 5: result |= xs[4] - ys[4];
475 case 4: result |= xs[3] - ys[3];
476 case 3: result |= xs[2] - ys[2];
477 case 2: result |= xs[1] - ys[1];
478 case 1: result |= xs[0] - ys[0];
479 case 0:
480 return result;
481 }
482 }
483 }
484
485 /* memcmp for longer strings */
486 return memcmp (xs, ys, length);
487}
488
489MY_INLINE int
490strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
491{
492#ifndef ELECTRIC_HEAP
493 assert ( !((size_t)ys & 3) );
494#endif
495 if (!((size_t)xs & 3))
496 {
497 int result;
498 /* aligned */
499 while (length >= 8)
500 {
501 result = *(int32_t*)xs - *(int32_t*)ys;
502 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
503 if (MY_PREDICT_FALSE(result))
504 return result;
505 xs += 8;
506 ys += 8;
507 length -= 8;
508 }
509 switch (length)
510 {
511 case 7:
512 result = *(int32_t*)xs - *(int32_t*)ys;
513 result |= xs[6] - ys[6];
514 result |= xs[5] - ys[5];
515 result |= xs[4] - ys[4];
516 return result;
517 case 6:
518 result = *(int32_t*)xs - *(int32_t*)ys;
519 result |= xs[5] - ys[5];
520 result |= xs[4] - ys[4];
521 return result;
522 case 5:
523 result = *(int32_t*)xs - *(int32_t*)ys;
524 result |= xs[4] - ys[4];
525 return result;
526 case 4:
527 return *(int32_t*)xs - *(int32_t*)ys;
528 case 3:
529 result = xs[2] - ys[2];
530 result |= xs[1] - ys[1];
531 result |= xs[0] - ys[0];
532 return result;
533 case 2:
534 result = xs[1] - ys[1];
535 result |= xs[0] - ys[0];
536 return result;
537 case 1:
538 return *xs - *ys;
539 default:
540 case 0:
541 return 0;
542 }
543 }
544 else
545 {
546 /* unaligned */
547 int result;
548 while (length >= 8)
549 {
550#if defined(__i386__) || defined(__x86_64__)
551 result = ( ((int32_t)xs[3] << 24)
552 | ((int32_t)xs[2] << 16)
553 | ((int32_t)xs[1] << 8)
554 | xs[0] )
555 - *(int32_t*)ys;
556 result |= ( ((int32_t)xs[7] << 24)
557 | ((int32_t)xs[6] << 16)
558 | ((int32_t)xs[5] << 8)
559 | xs[4] )
560 - *(int32_t*)(ys + 4);
561#else
562 result = xs[3] - ys[3];
563 result |= xs[2] - ys[2];
564 result |= xs[1] - ys[1];
565 result |= xs[0] - ys[0];
566 result |= xs[7] - ys[7];
567 result |= xs[6] - ys[6];
568 result |= xs[5] - ys[5];
569 result |= xs[4] - ys[4];
570#endif
571 if (MY_PREDICT_FALSE(result))
572 return result;
573 xs += 8;
574 ys += 8;
575 length -= 8;
576 }
577 result = 0;
578 switch (length)
579 {
580 case 7: result |= xs[6] - ys[6];
581 case 6: result |= xs[5] - ys[5];
582 case 5: result |= xs[4] - ys[4];
583 case 4: result |= xs[3] - ys[3];
584 case 3: result |= xs[2] - ys[2];
585 case 2: result |= xs[1] - ys[1];
586 case 1: result |= xs[0] - ys[0];
587 return result;
588 default:
589 case 0:
590 return 0;
591 }
592 }
593}
594
595MY_INLINE int
596strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
597 const char *str, unsigned int length, unsigned int hash)
598{
599 assert (!cache->case_insensitive);
600
601 /* the simple stuff first. */
602 if ( entry->hash != hash
603 || entry->length != length)
604 return 0;
605
606#if 0
607 return memcmp (str, entry + 1, length) == 0;
608#elif 1
609 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
610#else
611 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
612#endif
613}
614
615MY_INLINE int
616strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
617 const char *str, unsigned int length, unsigned int hash)
618{
619 assert (cache->case_insensitive);
620
621 /* the simple stuff first. */
622 if ( entry->hash != hash
623 || entry->length != length)
624 return 0;
625
626#if defined(_MSC_VER) || defined(__OS2__)
627 return _memicmp (entry + 1, str, length) == 0;
628#else
629 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
630#endif
631}
632
633static void
634strcache2_rehash (struct strcache2 *cache)
635{
636 unsigned int src = cache->hash_size;
637 struct strcache2_entry **src_tab = cache->hash_tab;
638 struct strcache2_entry **dst_tab;
639#ifndef STRCACHE2_USE_MASK
640 unsigned int hash_shift;
641#endif
642
643 /* Allocate a new hash table twice the size of the current. */
644 cache->hash_size <<= 1;
645#ifdef STRCACHE2_USE_MASK
646 cache->hash_mask <<= 1;
647 cache->hash_mask |= 1;
648#else
649 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
650 /* nothing */;
651 cache->hash_div = strcache2_find_prime (hash_shift);
652#endif
653 cache->rehash_count <<= 1;
654 cache->hash_tab = dst_tab = (struct strcache2_entry **)
655 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
656 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
657
658 /* Copy the entries from the old to the new hash table. */
659 cache->collision_count = 0;
660 while (src-- > 0)
661 {
662 struct strcache2_entry *entry = src_tab[src];
663 while (entry)
664 {
665 struct strcache2_entry *next = entry->next;
666 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
667 if ((entry->next = dst_tab[dst]) != 0)
668 cache->collision_count++;
669 dst_tab[dst] = entry;
670
671 entry = next;
672 }
673 }
674
675 /* That's it, just free the old table and we're done. */
676 free (src_tab);
677}
678
679static struct strcache2_seg *
680strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
681{
682 struct strcache2_seg *seg;
683 size_t size;
684 size_t off;
685
686 size = cache->def_seg_size;
687 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
688 {
689 size = (size_t)minlen * 2;
690 size = (size + 0xfff) & ~(size_t)0xfff;
691 }
692
693 seg = xmalloc (size);
694 seg->start = (char *)(seg + 1);
695 seg->size = size - sizeof (struct strcache2_seg);
696 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
697 if (off)
698 {
699 seg->start += off;
700 seg->size -= off;
701 }
702 assert (seg->size > minlen);
703 seg->cursor = seg->start;
704 seg->avail = seg->size;
705
706 seg->next = cache->seg_head;
707 cache->seg_head = seg;
708
709 return seg;
710}
711
712/* Internal worker that enters a new string into the cache. */
713static const char *
714strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
715 const char *str, unsigned int length,
716 unsigned int hash)
717{
718 struct strcache2_entry *entry;
719 struct strcache2_seg *seg;
720 unsigned int size;
721 char *str_copy;
722
723 /* Allocate space for the string. */
724
725 size = length + 1 + sizeof (struct strcache2_entry);
726 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
727
728 seg = cache->seg_head;
729 if (MY_PREDICT_FALSE(seg->avail < size))
730 {
731 do
732 seg = seg->next;
733 while (seg && seg->avail < size);
734 if (!seg)
735 seg = strcache2_new_seg (cache, size);
736 }
737
738 entry = (struct strcache2_entry *) seg->cursor;
739 assert ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1));
740 seg->cursor += size;
741 seg->avail -= size;
742
743 /* Setup the entry, copy the string and insert it into the hash table. */
744
745 entry->user = NULL;
746 entry->length = length;
747 entry->hash = hash;
748 str_copy = (char *) memcpy (entry + 1, str, length);
749 str_copy[length] = '\0';
750
751 if ((entry->next = cache->hash_tab[idx]) != 0)
752 cache->collision_count++;
753 cache->hash_tab[idx] = entry;
754 cache->count++;
755 if (cache->count >= cache->rehash_count)
756 strcache2_rehash (cache);
757
758 return str_copy;
759}
760
761/* The public add string interface. */
762const char *
763strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
764{
765 struct strcache2_entry const *entry;
766 unsigned int hash = strcache2_case_sensitive_hash (str, length);
767 unsigned int idx;
768
769 assert (!cache->case_insensitive);
770
771 MAKE_STATS (cache->lookup_count++);
772
773 /* Lookup the entry in the hash table, hoping for an
774 early match. If not found, enter the string at IDX. */
775 idx = STRCACHE2_MOD_IT (cache, hash);
776 entry = cache->hash_tab[idx];
777 if (!entry)
778 return strcache2_enter_string (cache, idx, str, length, hash);
779 if (strcache2_is_equal (cache, entry, str, length, hash))
780 return (const char *)(entry + 1);
781 MAKE_STATS (cache->collision_1st_count++);
782
783 entry = entry->next;
784 if (!entry)
785 return strcache2_enter_string (cache, idx, str, length, hash);
786 if (strcache2_is_equal (cache, entry, str, length, hash))
787 return (const char *)(entry + 1);
788 MAKE_STATS (cache->collision_2nd_count++);
789
790 /* Loop the rest. */
791 for (;;)
792 {
793 entry = entry->next;
794 if (!entry)
795 return strcache2_enter_string (cache, idx, str, length, hash);
796 if (strcache2_is_equal (cache, entry, str, length, hash))
797 return (const char *)(entry + 1);
798 MAKE_STATS (cache->collision_3rd_count++);
799 }
800 /* not reached */
801}
802
803/* The public add string interface for prehashed strings.
804 Use strcache2_hash_str to calculate the hash of a string. */
805const char *
806strcache2_add_hashed (struct strcache2 *cache, const char *str,
807 unsigned int length, unsigned int hash)
808{
809 struct strcache2_entry const *entry;
810 unsigned int idx;
811#ifndef NDEBUG
812 unsigned correct_hash;
813
814 assert (!cache->case_insensitive);
815 correct_hash = strcache2_case_sensitive_hash (str, length);
816 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
817#endif /* NDEBUG */
818
819 MAKE_STATS (cache->lookup_count++);
820
821 /* Lookup the entry in the hash table, hoping for an
822 early match. If not found, enter the string at IDX. */
823 idx = STRCACHE2_MOD_IT (cache, hash);
824 entry = cache->hash_tab[idx];
825 if (!entry)
826 return strcache2_enter_string (cache, idx, str, length, hash);
827 if (strcache2_is_equal (cache, entry, str, length, hash))
828 return (const char *)(entry + 1);
829 MAKE_STATS (cache->collision_1st_count++);
830
831 entry = entry->next;
832 if (!entry)
833 return strcache2_enter_string (cache, idx, str, length, hash);
834 if (strcache2_is_equal (cache, entry, str, length, hash))
835 return (const char *)(entry + 1);
836 MAKE_STATS (cache->collision_2nd_count++);
837
838 /* Loop the rest. */
839 for (;;)
840 {
841 entry = entry->next;
842 if (!entry)
843 return strcache2_enter_string (cache, idx, str, length, hash);
844 if (strcache2_is_equal (cache, entry, str, length, hash))
845 return (const char *)(entry + 1);
846 MAKE_STATS (cache->collision_3rd_count++);
847 }
848 /* not reached */
849}
850
851/* The public lookup (case sensitive) string interface. */
852const char *
853strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
854{
855 struct strcache2_entry const *entry;
856 unsigned int hash = strcache2_case_sensitive_hash (str, length);
857 unsigned int idx;
858
859 assert (!cache->case_insensitive);
860
861 MAKE_STATS (cache->lookup_count++);
862
863 /* Lookup the entry in the hash table, hoping for an
864 early match. */
865 idx = STRCACHE2_MOD_IT (cache, hash);
866 entry = cache->hash_tab[idx];
867 if (!entry)
868 return NULL;
869 if (strcache2_is_equal (cache, entry, str, length, hash))
870 return (const char *)(entry + 1);
871 MAKE_STATS (cache->collision_1st_count++);
872
873 entry = entry->next;
874 if (!entry)
875 return NULL;
876 if (strcache2_is_equal (cache, entry, str, length, hash))
877 return (const char *)(entry + 1);
878 MAKE_STATS (cache->collision_2nd_count++);
879
880 /* Loop the rest. */
881 for (;;)
882 {
883 entry = entry->next;
884 if (!entry)
885 return NULL;
886 if (strcache2_is_equal (cache, entry, str, length, hash))
887 return (const char *)(entry + 1);
888 MAKE_STATS (cache->collision_3rd_count++);
889 }
890 /* not reached */
891}
892
893#if defined(HAVE_CASE_INSENSITIVE_FS)
894
895/* The public add string interface for case insensitive strings. */
896const char *
897strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
898{
899 struct strcache2_entry const *entry;
900 unsigned int hash = strcache2_case_insensitive_hash (str, length);
901 unsigned int idx;
902
903 assert (!cache->case_insensitive);
904
905 MAKE_STATS (cache->lookup_count++);
906
907 /* Lookup the entry in the hash table, hoping for an
908 early match. If not found, enter the string at IDX. */
909 idx = STRCACHE2_MOD_IT (cache, hash);
910 entry = cache->hash_tab[idx];
911 if (!entry)
912 return strcache2_enter_string (cache, idx, str, length, hash);
913 if (strcache2_is_equal (cache, entry, str, length, hash))
914 return (const char *)(entry + 1);
915 MAKE_STATS (cache->collision_1st_count++);
916
917 entry = entry->next;
918 if (!entry)
919 return strcache2_enter_string (cache, idx, str, length, hash);
920 if (strcache2_is_equal (cache, entry, str, length, hash))
921 return (const char *)(entry + 1);
922 MAKE_STATS (cache->collision_2nd_count++);
923
924 /* Loop the rest. */
925 for (;;)
926 {
927 entry = entry->next;
928 if (!entry)
929 return strcache2_enter_string (cache, idx, str, length, hash);
930 if (strcache2_is_equal (cache, entry, str, length, hash))
931 return (const char *)(entry + 1);
932 MAKE_STATS (cache->collision_3rd_count++);
933 }
934 /* not reached */
935}
936
937/* The public add string interface for prehashed case insensitive strings.
938 Use strcache2_hash_istr to calculate the hash of a string. */
939const char *
940strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
941 unsigned int length, unsigned int hash)
942{
943 struct strcache2_entry const *entry;
944 unsigned int idx;
945#ifndef NDEBUG
946 unsigned correct_hash;
947
948 assert (!cache->case_insensitive);
949 correct_hash = strcache2_case_insensitive_hash (str, length);
950 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
951#endif /* NDEBUG */
952
953 MAKE_STATS (cache->lookup_count++);
954
955 /* Lookup the entry in the hash table, hoping for an
956 early match. If not found, enter the string at IDX. */
957 idx = STRCACHE2_MOD_IT (cache, hash);
958 entry = cache->hash_tab[idx];
959 if (!entry)
960 return strcache2_enter_string (cache, idx, str, length, hash);
961 if (strcache2_is_equal (cache, entry, str, length, hash))
962 return (const char *)(entry + 1);
963 MAKE_STATS (cache->collision_1st_count++);
964
965 entry = entry->next;
966 if (!entry)
967 return strcache2_enter_string (cache, idx, str, length, hash);
968 if (strcache2_is_equal (cache, entry, str, length, hash))
969 return (const char *)(entry + 1);
970 MAKE_STATS (cache->collision_2nd_count++);
971
972 /* Loop the rest. */
973 for (;;)
974 {
975 entry = entry->next;
976 if (!entry)
977 return strcache2_enter_string (cache, idx, str, length, hash);
978 if (strcache2_is_equal (cache, entry, str, length, hash))
979 return (const char *)(entry + 1);
980 MAKE_STATS (cache->collision_3rd_count++);
981 }
982 /* not reached */
983}
984
985/* The public lookup (case insensitive) string interface. */
986const char *
987strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
988{
989 struct strcache2_entry const *entry;
990 unsigned int hash = strcache2_case_insensitive_hash (str, length);
991 unsigned int idx;
992
993 assert (!cache->case_insensitive);
994
995 MAKE_STATS (cache->lookup_count++);
996
997 /* Lookup the entry in the hash table, hoping for an
998 early match. */
999 idx = STRCACHE2_MOD_IT (cache, hash);
1000 entry = cache->hash_tab[idx];
1001 if (!entry)
1002 return NULL;
1003 if (strcache2_is_equal (cache, entry, str, length, hash))
1004 return (const char *)(entry + 1);
1005 MAKE_STATS (cache->collision_1st_count++);
1006
1007 entry = entry->next;
1008 if (!entry)
1009 return NULL;
1010 if (strcache2_is_equal (cache, entry, str, length, hash))
1011 return (const char *)(entry + 1);
1012 MAKE_STATS (cache->collision_2nd_count++);
1013
1014 /* Loop the rest. */
1015 for (;;)
1016 {
1017 entry = entry->next;
1018 if (!entry)
1019 return NULL;
1020 if (strcache2_is_equal (cache, entry, str, length, hash))
1021 return (const char *)(entry + 1);
1022 MAKE_STATS (cache->collision_3rd_count++);
1023 }
1024 /* not reached */
1025}
1026
1027#endif /* HAVE_CASE_INSENSITIVE_FS */
1028
1029/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1030int
1031strcache2_is_cached (struct strcache2 *cache, const char *str)
1032{
1033 /* Check mandatory alignment first. */
1034 if (!((size_t)str & (sizeof (void *) - 1)))
1035 {
1036 /* Check the segment list and consider the question answered if the
1037 string is within one of them. (Could check it more thoroughly...) */
1038 struct strcache2_seg const *seg;
1039 for (seg = cache->seg_head; seg; seg = seg->next)
1040 if ((size_t)(str - seg->start) < seg->size)
1041 return 1;
1042 }
1043
1044 return 0;
1045}
1046
1047
1048/* Verify the integrity of the specified string, returning 0 if OK. */
1049int
1050strcache2_verify_entry (struct strcache2 *cache, const char *str)
1051{
1052 struct strcache2_entry const *entry;
1053 unsigned hash;
1054 const char *end;
1055
1056 if ((size_t)str & (sizeof (void *) - 1))
1057 {
1058 fprintf (stderr,
1059 "strcache2[%s]: missaligned string %p\nstring: %s\n",
1060 cache->name, (void *)str, str);
1061 return -1;
1062 }
1063
1064 entry = (struct strcache2_entry const *)str - 1;
1065 end = memchr (str, '\0', entry->length);
1066 if ((size_t)(end - str) == entry->length)
1067 {
1068 fprintf (stderr,
1069 "strcache2[%s]: corrupt entry %p, length: %lu, expected %u;\nstring: %s\n",
1070 cache->name, (void *)entry, (unsigned long)(end - str), entry->length, str);
1071 return -1;
1072 }
1073
1074 hash = cache->case_insensitive
1075 ? strcache2_case_insensitive_hash (str, entry->length)
1076 : strcache2_case_sensitive_hash (str, entry->length);
1077 if (hash != entry->hash)
1078 {
1079 fprintf (stderr,
1080 "strcache2[%s]: corrupt entry %p, hash#1: %x, expected %x;\nstring: %s\n",
1081 cache->name, (void *)entry, hash, entry->hash, str);
1082 return -1;
1083 }
1084
1085 return 0;
1086}
1087
1088
1089/* Calculates the case sensitive hash values of the string.
1090 The first hash is returned, the other is put at HASH2P. */
1091unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1092{
1093 *hash2p = 1;
1094 return strcache2_case_sensitive_hash (str, length);
1095}
1096
1097/* Calculates the case insensitive hash values of the string.
1098 The first hash is returned, the other is put at HASH2P. */
1099unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1100{
1101 *hash2p = 1;
1102 return strcache2_case_insensitive_hash (str, length);
1103}
1104
1105
1106
1107/* Initalizes a new cache. */
1108void
1109strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1110 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1111{
1112 unsigned hash_shift;
1113 assert (!thread_safe);
1114
1115 /* calc the size as a power of two */
1116 if (!size)
1117 hash_shift = STRCACHE2_HASH_SHIFT;
1118 else
1119 {
1120 assert (size <= (~0U / 2 + 1));
1121 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1122 /* nothing */;
1123 }
1124
1125 /* adjust the default segment size */
1126 if (!def_seg_size)
1127 def_seg_size = STRCACHE2_SEG_SIZE;
1128 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1129 def_seg_size = sizeof (struct strcache2_seg) * 10;
1130 else if ((def_seg_size & 0xfff) < 0xf00)
1131 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1132
1133
1134 /* init the structure. */
1135 cache->case_insensitive = case_insensitive;
1136#ifdef STRCACHE2_USE_MASK
1137 cache->hash_mask = (1U << hash_shift) - 1U;
1138#else
1139 cache->hash_div = strcache2_find_prime(hash_shift);
1140#endif
1141 cache->count = 0;
1142 cache->collision_count = 0;
1143 cache->lookup_count = 0;
1144 cache->collision_1st_count = 0;
1145 cache->collision_2nd_count = 0;
1146 cache->collision_3rd_count = 0;
1147 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1148 cache->init_size = 1U << hash_shift;
1149 cache->hash_size = 1U << hash_shift;
1150 cache->def_seg_size = def_seg_size;
1151 cache->lock = NULL;
1152 cache->name = name;
1153
1154 /* allocate the hash table and first segment. */
1155 cache->hash_tab = (struct strcache2_entry **)
1156 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1157 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1158 strcache2_new_seg (cache, 0);
1159
1160 /* link it */
1161 cache->next = strcache_head;
1162 strcache_head = cache;
1163}
1164
1165
1166/* Terminates a string cache, freeing all memory and other
1167 associated resources. */
1168void
1169strcache2_term (struct strcache2 *cache)
1170{
1171 /* unlink it */
1172 if (strcache_head == cache)
1173 strcache_head = cache->next;
1174 else
1175 {
1176 struct strcache2 *prev = strcache_head;
1177 while (prev->next != cache)
1178 prev = prev->next;
1179 assert (prev);
1180 prev->next = cache->next;
1181 }
1182
1183 /* free the memory segments */
1184 do
1185 {
1186 void *free_it = cache->seg_head;
1187 cache->seg_head = cache->seg_head->next;
1188 free (free_it);
1189 }
1190 while (cache->seg_head);
1191
1192 /* free the hash and clear the structure. */
1193 free (cache->hash_tab);
1194 memset (cache, '\0', sizeof (struct strcache2));
1195}
1196
1197/* Print statistics a string cache. */
1198void
1199strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1200{
1201 unsigned int seg_count = 0;
1202 unsigned long seg_total_bytes = 0;
1203 unsigned long seg_avg_bytes;
1204 unsigned long seg_avail_bytes = 0;
1205 unsigned long seg_max_bytes = 0;
1206 struct strcache2_seg *seg;
1207 unsigned int str_count = 0;
1208 unsigned long str_total_len = 0;
1209 unsigned int str_avg_len;
1210 unsigned int str_min_len = ~0U;
1211 unsigned int str_max_len = 0;
1212 unsigned int idx;
1213 unsigned int rehashes;
1214 unsigned int chain_depths[32];
1215
1216 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1217
1218 /* Segment statistics. */
1219 for (seg = cache->seg_head; seg; seg = seg->next)
1220 {
1221 seg_count++;
1222 seg_total_bytes += seg->size;
1223 seg_avail_bytes += seg->avail;
1224 if (seg->size > seg_max_bytes)
1225 seg_max_bytes = seg->size;
1226 }
1227 seg_avg_bytes = seg_total_bytes / seg_count;
1228 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1229 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1230 cache->def_seg_size, seg_avail_bytes);
1231
1232 /* String statistics. */
1233 memset (chain_depths, '\0', sizeof (chain_depths));
1234 idx = cache->hash_size;
1235 while (idx-- > 0)
1236 {
1237 struct strcache2_entry const *entry = cache->hash_tab[idx];
1238 unsigned int depth = 0;
1239 for (; entry != 0; entry = entry->next, depth++)
1240 {
1241 unsigned int length = entry->length;
1242 str_total_len += length;
1243 if (length > str_max_len)
1244 str_max_len = length;
1245 if (length < str_min_len)
1246 str_min_len = length;
1247 str_count++;
1248 }
1249 chain_depths[depth >= 32 ? 31 : depth]++;
1250 }
1251 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1252 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1253 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1254 if (str_count != cache->count)
1255 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1256 cache->count, str_count);
1257
1258 /* Hash statistics. */
1259 idx = cache->init_size;
1260 rehashes = 0;
1261 while (idx < cache->hash_size)
1262 {
1263 idx *= 2;
1264 rehashes++;
1265 }
1266
1267#ifdef STRCACHE2_USE_MASK
1268 printf (_("%s hash size = %u mask = %#x rehashed %u times lookups = %lu\n"),
1269 prefix, cache->hash_size, cache->hash_mask, rehashes, cache->lookup_count);
1270#else
1271 printf (_("%s hash size = %u div = %#x rehashed %u times lookups = %lu\n"),
1272 prefix, cache->hash_size, cache->hash_div, rehashes, cache->lookup_count);
1273#endif
1274 if (cache->lookup_count)
1275 printf (_("%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)\n"),
1276 prefix,
1277 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1278 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1279 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1280 printf (_("%s hash insert collisions = %u (%u%%)\n"),
1281 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1282 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1283 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1284 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1285 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1286 for (idx = 2; idx < 32; idx++)
1287 {
1288 unsigned strs_at_this_depth = chain_depths[idx];
1289 unsigned i;
1290 for (i = idx + 1; i < 32; i++)
1291 strs_at_this_depth += chain_depths[i];
1292 if (strs_at_this_depth)
1293 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1294 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1295 idx - 1, idx == 2 ? " " : "s",
1296 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1297 }
1298}
1299
1300/* Print statistics for all string caches. */
1301void
1302strcache2_print_stats_all (const char *prefix)
1303{
1304 struct strcache2 *cur;
1305 for (cur = strcache_head; cur; cur = cur->next)
1306 strcache2_print_stats (cur, prefix);
1307}
1308
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette