VirtualBox

source: kBuild/trunk/src/kmk/strcache2.c@ 2012

Last change on this file since 2012 was 1944, checked in by bird, 16 years ago

strcache2.c: assert input string lengths; fix checks in case insensitive functions (inverted).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 38.5 KB
Line 
1/* $Id: strcache2.c 1944 2008-10-26 02:37:24Z bird $ */
2/** @file
3 * strcache2 - New string cache.
4 */
5
6/*
7 * Copyright (c) 2008 knut st. osmundsen <[email protected]>
8 *
9 * This file is part of kBuild.
10 *
11 * kBuild is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * kBuild is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with kBuild; if not, write to the Free Software
23 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 *
25 */
26
27
28/*******************************************************************************
29* Header Files *
30*******************************************************************************/
31#include "make.h"
32#include "strcache2.h"
33
34#include <assert.h>
35
36#include "debug.h"
37
38#ifdef _MSC_VER
39typedef unsigned char uint8_t;
40typedef unsigned short uint16_t;
41typedef unsigned int uint32_t;
42typedef signed char int8_t;
43typedef signed short int16_t;
44typedef signed int int32_t;
45#else
46# include <stdint.h>
47#endif
48
49#ifdef WINDOWS32
50# include <io.h>
51# include <process.h>
52# include <Windows.h>
53# define PARSE_IN_WORKER
54#endif
55
56#ifdef __OS2__
57# include <sys/fmutex.h>
58#endif
59
60#ifdef HAVE_PTHREAD
61# include <pthread.h>
62#endif
63
64
65/*******************************************************************************
66* Defined Constants And Macros *
67*******************************************************************************/
68/* The default size of a memory segment (1MB). */
69#define STRCACHE2_SEG_SIZE (1024U*1024U)
70/* The default hash table shift (hash size give as a power of two). */
71#define STRCACHE2_HASH_SHIFT 16
72/** Does the modding / masking of a hash number into an index. */
73#ifdef STRCACHE2_USE_MASK
74# define STRCACHE2_MOD_IT(cache, hash) ((hash) & (cache)->hash_mask)
75#else
76# define STRCACHE2_MOD_IT(cache, hash) ((hash) % (cache)->hash_div)
77#endif
78
79# if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
80 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
81# define strcache2_get_unaligned_16bits(ptr) ( *((const uint16_t *)(ptr)))
82# else
83 /* (endian doesn't matter) */
84# define strcache2_get_unaligned_16bits(ptr) ( (((const uint8_t *)(ptr))[0] << 8) \
85 | (((const uint8_t *)(ptr))[1]) )
86# endif
87
88
89/*******************************************************************************
90* Global Variables *
91*******************************************************************************/
92/* List of initialized string caches. */
93static struct strcache2 *strcache_head;
94
95
96/** Finds the closest primary number for power of two value (or something else
97 * useful if not support). */
98MY_INLINE unsigned int strcache2_find_prime(unsigned int shift)
99{
100 switch (shift)
101 {
102 case 5: return 31;
103 case 6: return 61;
104 case 7: return 127;
105 case 8: return 251;
106 case 9: return 509;
107 case 10: return 1021;
108 case 11: return 2039;
109 case 12: return 4093;
110 case 13: return 8191;
111 case 14: return 16381;
112 case 15: return 32749;
113 case 16: return 65521;
114 case 17: return 131063;
115 case 18: return 262139;
116 case 19: return 524269;
117 case 20: return 1048573;
118 case 21: return 2097143;
119 case 22: return 4194301;
120 case 23: return 8388593;
121
122 default:
123 assert (0);
124 return (1 << shift) - 1;
125 }
126}
127
128/* The following is a bit experiment. It produces longer chains, i.e. worse
129 distribution of the strings in the table, however the actual make
130 performances is better (<time). The explanation is probably that the
131 collisions only really increase for entries that aren't looked up that
132 much and that it actually improoves the situation for those that is. Or
133 that we spend so much less time hashing that it makes up (and more) for
134 the pentalty we suffer from the longer chains and worse distribution.
135
136 XXX: Check how this works out with different path lengths. I suspect it
137 might depend on the length of PATH_ROOT and the depth of the files
138 in the project as well. If it does, this might make matters worse
139 for some and better for others which isn't very cool... */
140
141#if 0
142# define BIG_HASH_SIZE 32 /* kinda fast */
143# define BIG_HASH_HEAD 16
144# define BIG_HASH_TAIL 12
145#elif 0
146# define BIG_HASH_SIZE 68 /* kinda safe */
147# define BIG_HASH_HEAD 24
148# define BIG_HASH_TAIL 24
149#elif 0
150# define BIG_HASH_SIZE 128 /* safe */
151# define BIG_HASH_HEAD 32
152# define BIG_HASH_TAIL 32
153#endif
154
155#ifdef BIG_HASH_SIZE
156/* long string: hash head and tail, drop the middle. */
157MY_INLINE unsigned int
158strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
159{
160 uint32_t hash = len;
161 uint32_t tmp;
162 unsigned int head;
163
164 /* head BIG_HASH_HEAD bytes */
165 head = (BIG_HASH_HEAD >> 2);
166 while (head-- > 0)
167 {
168 hash += strcache2_get_unaligned_16bits (str);
169 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
170 hash = (hash << 16) ^ tmp;
171 str += 2 * sizeof (uint16_t);
172 hash += hash >> 11;
173 }
174
175 /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
176 str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
177 head = (BIG_HASH_TAIL >> 2);
178 while (head-- > 0)
179 {
180 hash += strcache2_get_unaligned_16bits (str);
181 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
182 hash = (hash << 16) ^ tmp;
183 str += 2 * sizeof (uint16_t);
184 hash += hash >> 11;
185 }
186
187 /* force "avalanching" of final 127 bits. */
188 hash ^= hash << 3;
189 hash += hash >> 5;
190 hash ^= hash << 4;
191 hash += hash >> 17;
192 hash ^= hash << 25;
193 hash += hash >> 6;
194
195 return hash;
196}
197#endif /* BIG_HASH_SIZE */
198
199MY_INLINE unsigned int
200strcache2_case_sensitive_hash (const char *str, unsigned int len)
201{
202#if 1
203 /* Paul Hsieh hash SuperFast function:
204 http://www.azillionmonkeys.com/qed/hash.html
205
206 This performs very good and as a sligtly better distribution than
207 STRING_N_HASH_1 on a typical kBuild run.
208
209 It is also 37% faster than return_STRING_N_HASH_1 when running the
210 two 100 times over typical kBuild strings that end up here (did a
211 fprintf here and built kBuild). Compiler was 32-bit gcc 4.0.1, darwin,
212 with -O2.
213
214 FIXME: A path for well aligned data should be added to speed up
215 execution on alignment sensitive systems. */
216 unsigned int rem;
217 uint32_t hash;
218 uint32_t tmp;
219
220 assert (sizeof (uint8_t) == sizeof (char));
221
222# ifdef BIG_HASH_SIZE
223 /* long string? */
224# if 0 /*BIG_HASH_SIZE > 128*/
225 if (MY_PREDICT_FALSE(len >= BIG_HASH_SIZE))
226# else
227 if (len >= BIG_HASH_SIZE)
228# endif
229 return strcache2_case_sensitive_hash_big (str, len);
230# endif
231
232 /* short string: main loop, walking on 2 x uint16_t */
233 hash = len;
234 rem = len & 3;
235 len >>= 2;
236 while (len > 0)
237 {
238 hash += strcache2_get_unaligned_16bits (str);
239 tmp = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
240 hash = (hash << 16) ^ tmp;
241 str += 2 * sizeof (uint16_t);
242 hash += hash >> 11;
243 len--;
244 }
245
246 /* the remainder */
247 switch (rem)
248 {
249 case 3:
250 hash += strcache2_get_unaligned_16bits (str);
251 hash ^= hash << 16;
252 hash ^= str[sizeof (uint16_t)] << 18;
253 hash += hash >> 11;
254 break;
255 case 2:
256 hash += strcache2_get_unaligned_16bits (str);
257 hash ^= hash << 11;
258 hash += hash >> 17;
259 break;
260 case 1:
261 hash += *str;
262 hash ^= hash << 10;
263 hash += hash >> 1;
264 break;
265 }
266
267 /* force "avalanching" of final 127 bits. */
268 hash ^= hash << 3;
269 hash += hash >> 5;
270 hash ^= hash << 4;
271 hash += hash >> 17;
272 hash ^= hash << 25;
273 hash += hash >> 6;
274
275 return hash;
276
277#elif 1
278 /* Note! This implementation is 18% faster than return_STRING_N_HASH_1
279 when running the two 100 times over typical kBuild strings that
280 end up here (did a fprintf here and built kBuild).
281 Compiler was 32-bit gcc 4.0.1, darwin, with -O2. */
282
283 unsigned int hash = 0;
284 if (MY_PREDICT_TRUE(len >= 2))
285 {
286 unsigned int ch0 = *str++;
287 hash = 0;
288 len--;
289 while (len >= 2)
290 {
291 unsigned int ch1 = *str++;
292 hash += ch0 << (ch1 & 0xf);
293
294 ch0 = *str++;
295 hash += ch1 << (ch0 & 0xf);
296
297 len -= 2;
298 }
299 if (len == 1)
300 {
301 unsigned ch1 = *str;
302 hash += ch0 << (ch1 & 0xf);
303
304 hash += ch1;
305 }
306 else
307 hash += ch0;
308 }
309 else if (len)
310 {
311 hash = *str;
312 hash += hash << (hash & 0xf);
313 }
314 else
315 hash = 0;
316 return hash;
317
318#elif 1
319# if 0
320 /* This is SDBM. This specific form/unroll was benchmarked to be 28% faster
321 than return_STRING_N_HASH_1. (Now the weird thing is that putting the (ch)
322 first in the assignment made it noticably slower.)
323
324 However, it is noticably slower in practice, most likely because of more
325 collisions. Hrmpf. */
326
327# define UPDATE_HASH(ch) hash = (hash << 6) + (hash << 16) - hash + (ch)
328 unsigned int hash = 0;
329
330# else
331 /* This is DJB2. This specific form/unroll was benchmarked to be 27%
332 fast than return_STRING_N_HASH_1.
333
334 Ditto. */
335
336# define UPDATE_HASH(ch) hash = (hash << 5) + hash + (ch)
337 unsigned int hash = 5381;
338# endif
339
340
341 while (len >= 4)
342 {
343 UPDATE_HASH (str[0]);
344 UPDATE_HASH (str[1]);
345 UPDATE_HASH (str[2]);
346 UPDATE_HASH (str[3]);
347 str += 4;
348 len -= 4;
349 }
350 switch (len)
351 {
352 default:
353 case 0:
354 return hash;
355 case 1:
356 UPDATE_HASH (str[0]);
357 return hash;
358 case 2:
359 UPDATE_HASH (str[0]);
360 UPDATE_HASH (str[1]);
361 return hash;
362 case 3:
363 UPDATE_HASH (str[0]);
364 UPDATE_HASH (str[1]);
365 UPDATE_HASH (str[2]);
366 return hash;
367 }
368#endif
369}
370
371MY_INLINE unsigned int
372strcache2_case_insensitive_hash (const char *str, unsigned int len)
373{
374 unsigned int hash = 0;
375 if (MY_PREDICT_TRUE(len >= 2))
376 {
377 unsigned int ch0 = *str++;
378 ch0 = tolower (ch0);
379 hash = 0;
380 len--;
381 while (len >= 2)
382 {
383 unsigned int ch1 = *str++;
384 ch1 = tolower (ch1);
385 hash += ch0 << (ch1 & 0xf);
386
387 ch0 = *str++;
388 ch0 = tolower (ch0);
389 hash += ch1 << (ch0 & 0xf);
390
391 len -= 2;
392 }
393 if (len == 1)
394 {
395 unsigned ch1 = *str;
396 ch1 = tolower (ch1);
397 hash += ch0 << (ch1 & 0xf);
398
399 hash += ch1;
400 }
401 else
402 hash += ch0;
403 }
404 else if (len)
405 {
406 hash = *str;
407 hash += hash << (hash & 0xf);
408 }
409 else
410 hash = 0;
411 return hash;
412}
413
414MY_INLINE int
415strcache2_memcmp_inline_short (const char *xs, const char *ys, unsigned int length)
416{
417 if (length <= 8)
418 {
419 /* short string compare - ~50% of the kBuild calls. */
420 assert ( !((size_t)ys & 3) );
421 if (!((size_t)xs & 3))
422 {
423 /* aligned */
424 int result;
425 switch (length)
426 {
427 default: /* memcmp for longer strings */
428 return memcmp (xs, ys, length);
429 case 8:
430 result = *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
431 result |= *(int32_t*)xs - *(int32_t*)ys;
432 return result;
433 case 7:
434 result = xs[6] - ys[6];
435 result |= xs[5] - ys[5];
436 result |= xs[4] - ys[4];
437 result |= *(int32_t*)xs - *(int32_t*)ys;
438 return result;
439 case 6:
440 result = xs[5] - ys[5];
441 result |= xs[4] - ys[4];
442 result |= *(int32_t*)xs - *(int32_t*)ys;
443 return result;
444 case 5:
445 result = xs[4] - ys[4];
446 result |= *(int32_t*)xs - *(int32_t*)ys;
447 return result;
448 case 4:
449 return *(int32_t*)xs - *(int32_t*)ys;
450 case 3:
451 result = xs[2] - ys[2];
452 result |= xs[1] - ys[1];
453 result |= xs[0] - ys[0];
454 return result;
455 case 2:
456 result = xs[1] - ys[1];
457 result |= xs[0] - ys[0];
458 return result;
459 case 1:
460 return *xs - *ys;
461 case 0:
462 return 0;
463 }
464 }
465 else
466 {
467 /* unaligned */
468 int result = 0;
469 switch (length)
470 {
471 case 8: result |= xs[7] - ys[7];
472 case 7: result |= xs[6] - ys[6];
473 case 6: result |= xs[5] - ys[5];
474 case 5: result |= xs[4] - ys[4];
475 case 4: result |= xs[3] - ys[3];
476 case 3: result |= xs[2] - ys[2];
477 case 2: result |= xs[1] - ys[1];
478 case 1: result |= xs[0] - ys[0];
479 case 0:
480 return result;
481 }
482 }
483 }
484
485 /* memcmp for longer strings */
486 return memcmp (xs, ys, length);
487}
488
489MY_INLINE int
490strcache2_memcmp_inlined (const char *xs, const char *ys, unsigned int length)
491{
492#ifndef ELECTRIC_HEAP
493 assert ( !((size_t)ys & 3) );
494#endif
495 if (!((size_t)xs & 3))
496 {
497 int result;
498 /* aligned */
499 while (length >= 8)
500 {
501 result = *(int32_t*)xs - *(int32_t*)ys;
502 result |= *(int32_t*)(xs + 4) - *(int32_t*)(ys + 4);
503 if (MY_PREDICT_FALSE(result))
504 return result;
505 xs += 8;
506 ys += 8;
507 length -= 8;
508 }
509 switch (length)
510 {
511 case 7:
512 result = *(int32_t*)xs - *(int32_t*)ys;
513 result |= xs[6] - ys[6];
514 result |= xs[5] - ys[5];
515 result |= xs[4] - ys[4];
516 return result;
517 case 6:
518 result = *(int32_t*)xs - *(int32_t*)ys;
519 result |= xs[5] - ys[5];
520 result |= xs[4] - ys[4];
521 return result;
522 case 5:
523 result = *(int32_t*)xs - *(int32_t*)ys;
524 result |= xs[4] - ys[4];
525 return result;
526 case 4:
527 return *(int32_t*)xs - *(int32_t*)ys;
528 case 3:
529 result = xs[2] - ys[2];
530 result |= xs[1] - ys[1];
531 result |= xs[0] - ys[0];
532 return result;
533 case 2:
534 result = xs[1] - ys[1];
535 result |= xs[0] - ys[0];
536 return result;
537 case 1:
538 return *xs - *ys;
539 default:
540 case 0:
541 return 0;
542 }
543 }
544 else
545 {
546 /* unaligned */
547 int result;
548 while (length >= 8)
549 {
550#if defined(__i386__) || defined(__x86_64__)
551 result = ( ((int32_t)xs[3] << 24)
552 | ((int32_t)xs[2] << 16)
553 | ((int32_t)xs[1] << 8)
554 | xs[0] )
555 - *(int32_t*)ys;
556 result |= ( ((int32_t)xs[7] << 24)
557 | ((int32_t)xs[6] << 16)
558 | ((int32_t)xs[5] << 8)
559 | xs[4] )
560 - *(int32_t*)(ys + 4);
561#else
562 result = xs[3] - ys[3];
563 result |= xs[2] - ys[2];
564 result |= xs[1] - ys[1];
565 result |= xs[0] - ys[0];
566 result |= xs[7] - ys[7];
567 result |= xs[6] - ys[6];
568 result |= xs[5] - ys[5];
569 result |= xs[4] - ys[4];
570#endif
571 if (MY_PREDICT_FALSE(result))
572 return result;
573 xs += 8;
574 ys += 8;
575 length -= 8;
576 }
577 result = 0;
578 switch (length)
579 {
580 case 7: result |= xs[6] - ys[6];
581 case 6: result |= xs[5] - ys[5];
582 case 5: result |= xs[4] - ys[4];
583 case 4: result |= xs[3] - ys[3];
584 case 3: result |= xs[2] - ys[2];
585 case 2: result |= xs[1] - ys[1];
586 case 1: result |= xs[0] - ys[0];
587 return result;
588 default:
589 case 0:
590 return 0;
591 }
592 }
593}
594
595MY_INLINE int
596strcache2_is_equal (struct strcache2 *cache, struct strcache2_entry const *entry,
597 const char *str, unsigned int length, unsigned int hash)
598{
599 assert (!cache->case_insensitive);
600
601 /* the simple stuff first. */
602 if ( entry->hash != hash
603 || entry->length != length)
604 return 0;
605
606#if 0
607 return memcmp (str, entry + 1, length) == 0;
608#elif 1
609 return strcache2_memcmp_inlined (str, (const char *)(entry + 1), length) == 0;
610#else
611 return strcache2_memcmp_inline_short (str, (const char *)(entry + 1), length) == 0;
612#endif
613}
614
615MY_INLINE int
616strcache2_is_iequal (struct strcache2 *cache, struct strcache2_entry const *entry,
617 const char *str, unsigned int length, unsigned int hash)
618{
619 assert (cache->case_insensitive);
620
621 /* the simple stuff first. */
622 if ( entry->hash != hash
623 || entry->length != length)
624 return 0;
625
626#if defined(_MSC_VER) || defined(__OS2__)
627 return _memicmp (entry + 1, str, length) == 0;
628#else
629 return strncasecmp ((const char *)(entry + 1), str, length) == 0;
630#endif
631}
632
633static void
634strcache2_rehash (struct strcache2 *cache)
635{
636 unsigned int src = cache->hash_size;
637 struct strcache2_entry **src_tab = cache->hash_tab;
638 struct strcache2_entry **dst_tab;
639#ifndef STRCACHE2_USE_MASK
640 unsigned int hash_shift;
641#endif
642
643 /* Allocate a new hash table twice the size of the current. */
644 cache->hash_size <<= 1;
645#ifdef STRCACHE2_USE_MASK
646 cache->hash_mask <<= 1;
647 cache->hash_mask |= 1;
648#else
649 for (hash_shift = 1; (1U << hash_shift) < cache->hash_size; hash_shift++)
650 /* nothing */;
651 cache->hash_div = strcache2_find_prime (hash_shift);
652#endif
653 cache->rehash_count <<= 1;
654 cache->hash_tab = dst_tab = (struct strcache2_entry **)
655 xmalloc (cache->hash_size * sizeof (struct strcache2_entry *));
656 memset (dst_tab, '\0', cache->hash_size * sizeof (struct strcache2_entry *));
657
658 /* Copy the entries from the old to the new hash table. */
659 cache->collision_count = 0;
660 while (src-- > 0)
661 {
662 struct strcache2_entry *entry = src_tab[src];
663 while (entry)
664 {
665 struct strcache2_entry *next = entry->next;
666 unsigned int dst = STRCACHE2_MOD_IT (cache, entry->hash);
667 if ((entry->next = dst_tab[dst]) != 0)
668 cache->collision_count++;
669 dst_tab[dst] = entry;
670
671 entry = next;
672 }
673 }
674
675 /* That's it, just free the old table and we're done. */
676 free (src_tab);
677}
678
679static struct strcache2_seg *
680strcache2_new_seg (struct strcache2 *cache, unsigned int minlen)
681{
682 struct strcache2_seg *seg;
683 size_t size;
684 size_t off;
685
686 size = cache->def_seg_size;
687 if (size < (size_t)minlen + sizeof (struct strcache2_seg) + STRCACHE2_ENTRY_ALIGNMENT)
688 {
689 size = (size_t)minlen * 2;
690 size = (size + 0xfff) & ~(size_t)0xfff;
691 }
692
693 seg = xmalloc (size);
694 seg->start = (char *)(seg + 1);
695 seg->size = size - sizeof (struct strcache2_seg);
696 off = (size_t)seg->start & (STRCACHE2_ENTRY_ALIGNMENT - 1);
697 if (off)
698 {
699 off = STRCACHE2_ENTRY_ALIGNMENT - off;
700 seg->start += off;
701 seg->size -= off;
702 }
703 assert (seg->size > minlen);
704 seg->cursor = seg->start;
705 seg->avail = seg->size;
706
707 seg->next = cache->seg_head;
708 cache->seg_head = seg;
709
710 return seg;
711}
712
713/* Internal worker that enters a new string into the cache. */
714static const char *
715strcache2_enter_string (struct strcache2 *cache, unsigned int idx,
716 const char *str, unsigned int length,
717 unsigned int hash)
718{
719 struct strcache2_entry *entry;
720 struct strcache2_seg *seg;
721 unsigned int size;
722 char *str_copy;
723
724 /* Allocate space for the string. */
725
726 size = length + 1 + sizeof (struct strcache2_entry);
727 size = (size + STRCACHE2_ENTRY_ALIGNMENT - 1) & ~(STRCACHE2_ENTRY_ALIGNMENT - 1U);
728
729 seg = cache->seg_head;
730 if (MY_PREDICT_FALSE(seg->avail < size))
731 {
732 do
733 seg = seg->next;
734 while (seg && seg->avail < size);
735 if (!seg)
736 seg = strcache2_new_seg (cache, size);
737 }
738
739 entry = (struct strcache2_entry *) seg->cursor;
740 assert (!((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1)));
741 seg->cursor += size;
742 seg->avail -= size;
743
744 /* Setup the entry, copy the string and insert it into the hash table. */
745
746 entry->user = NULL;
747 entry->length = length;
748 entry->hash = hash;
749 str_copy = (char *) memcpy (entry + 1, str, length);
750 str_copy[length] = '\0';
751
752 if ((entry->next = cache->hash_tab[idx]) != 0)
753 cache->collision_count++;
754 cache->hash_tab[idx] = entry;
755 cache->count++;
756 if (cache->count >= cache->rehash_count)
757 strcache2_rehash (cache);
758
759 return str_copy;
760}
761
762/* The public add string interface. */
763const char *
764strcache2_add (struct strcache2 *cache, const char *str, unsigned int length)
765{
766 struct strcache2_entry const *entry;
767 unsigned int hash = strcache2_case_sensitive_hash (str, length);
768 unsigned int idx;
769
770 assert (!cache->case_insensitive);
771 assert (!memchr (str, '\0', length));
772
773 MAKE_STATS (cache->lookup_count++);
774
775 /* Lookup the entry in the hash table, hoping for an
776 early match. If not found, enter the string at IDX. */
777 idx = STRCACHE2_MOD_IT (cache, hash);
778 entry = cache->hash_tab[idx];
779 if (!entry)
780 return strcache2_enter_string (cache, idx, str, length, hash);
781 if (strcache2_is_equal (cache, entry, str, length, hash))
782 return (const char *)(entry + 1);
783 MAKE_STATS (cache->collision_1st_count++);
784
785 entry = entry->next;
786 if (!entry)
787 return strcache2_enter_string (cache, idx, str, length, hash);
788 if (strcache2_is_equal (cache, entry, str, length, hash))
789 return (const char *)(entry + 1);
790 MAKE_STATS (cache->collision_2nd_count++);
791
792 /* Loop the rest. */
793 for (;;)
794 {
795 entry = entry->next;
796 if (!entry)
797 return strcache2_enter_string (cache, idx, str, length, hash);
798 if (strcache2_is_equal (cache, entry, str, length, hash))
799 return (const char *)(entry + 1);
800 MAKE_STATS (cache->collision_3rd_count++);
801 }
802 /* not reached */
803}
804
805/* The public add string interface for prehashed strings.
806 Use strcache2_hash_str to calculate the hash of a string. */
807const char *
808strcache2_add_hashed (struct strcache2 *cache, const char *str,
809 unsigned int length, unsigned int hash)
810{
811 struct strcache2_entry const *entry;
812 unsigned int idx;
813#ifndef NDEBUG
814 unsigned correct_hash;
815
816 assert (!cache->case_insensitive);
817 assert (!memchr (str, '\0', length));
818 correct_hash = strcache2_case_sensitive_hash (str, length);
819 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
820#endif /* NDEBUG */
821
822 MAKE_STATS (cache->lookup_count++);
823
824 /* Lookup the entry in the hash table, hoping for an
825 early match. If not found, enter the string at IDX. */
826 idx = STRCACHE2_MOD_IT (cache, hash);
827 entry = cache->hash_tab[idx];
828 if (!entry)
829 return strcache2_enter_string (cache, idx, str, length, hash);
830 if (strcache2_is_equal (cache, entry, str, length, hash))
831 return (const char *)(entry + 1);
832 MAKE_STATS (cache->collision_1st_count++);
833
834 entry = entry->next;
835 if (!entry)
836 return strcache2_enter_string (cache, idx, str, length, hash);
837 if (strcache2_is_equal (cache, entry, str, length, hash))
838 return (const char *)(entry + 1);
839 MAKE_STATS (cache->collision_2nd_count++);
840
841 /* Loop the rest. */
842 for (;;)
843 {
844 entry = entry->next;
845 if (!entry)
846 return strcache2_enter_string (cache, idx, str, length, hash);
847 if (strcache2_is_equal (cache, entry, str, length, hash))
848 return (const char *)(entry + 1);
849 MAKE_STATS (cache->collision_3rd_count++);
850 }
851 /* not reached */
852}
853
854/* The public lookup (case sensitive) string interface. */
855const char *
856strcache2_lookup (struct strcache2 *cache, const char *str, unsigned int length)
857{
858 struct strcache2_entry const *entry;
859 unsigned int hash = strcache2_case_sensitive_hash (str, length);
860 unsigned int idx;
861
862 assert (!cache->case_insensitive);
863 assert (!memchr (str, '\0', length));
864
865 MAKE_STATS (cache->lookup_count++);
866
867 /* Lookup the entry in the hash table, hoping for an
868 early match. */
869 idx = STRCACHE2_MOD_IT (cache, hash);
870 entry = cache->hash_tab[idx];
871 if (!entry)
872 return NULL;
873 if (strcache2_is_equal (cache, entry, str, length, hash))
874 return (const char *)(entry + 1);
875 MAKE_STATS (cache->collision_1st_count++);
876
877 entry = entry->next;
878 if (!entry)
879 return NULL;
880 if (strcache2_is_equal (cache, entry, str, length, hash))
881 return (const char *)(entry + 1);
882 MAKE_STATS (cache->collision_2nd_count++);
883
884 /* Loop the rest. */
885 for (;;)
886 {
887 entry = entry->next;
888 if (!entry)
889 return NULL;
890 if (strcache2_is_equal (cache, entry, str, length, hash))
891 return (const char *)(entry + 1);
892 MAKE_STATS (cache->collision_3rd_count++);
893 }
894 /* not reached */
895}
896
897#if defined(HAVE_CASE_INSENSITIVE_FS)
898
899/* The public add string interface for case insensitive strings. */
900const char *
901strcache2_iadd (struct strcache2 *cache, const char *str, unsigned int length)
902{
903 struct strcache2_entry const *entry;
904 unsigned int hash = strcache2_case_insensitive_hash (str, length);
905 unsigned int idx;
906
907 assert (cache->case_insensitive);
908 assert (!memchr (str, '\0', length));
909
910 MAKE_STATS (cache->lookup_count++);
911
912 /* Lookup the entry in the hash table, hoping for an
913 early match. If not found, enter the string at IDX. */
914 idx = STRCACHE2_MOD_IT (cache, hash);
915 entry = cache->hash_tab[idx];
916 if (!entry)
917 return strcache2_enter_string (cache, idx, str, length, hash);
918 if (strcache2_is_equal (cache, entry, str, length, hash))
919 return (const char *)(entry + 1);
920 MAKE_STATS (cache->collision_1st_count++);
921
922 entry = entry->next;
923 if (!entry)
924 return strcache2_enter_string (cache, idx, str, length, hash);
925 if (strcache2_is_equal (cache, entry, str, length, hash))
926 return (const char *)(entry + 1);
927 MAKE_STATS (cache->collision_2nd_count++);
928
929 /* Loop the rest. */
930 for (;;)
931 {
932 entry = entry->next;
933 if (!entry)
934 return strcache2_enter_string (cache, idx, str, length, hash);
935 if (strcache2_is_equal (cache, entry, str, length, hash))
936 return (const char *)(entry + 1);
937 MAKE_STATS (cache->collision_3rd_count++);
938 }
939 /* not reached */
940}
941
942/* The public add string interface for prehashed case insensitive strings.
943 Use strcache2_hash_istr to calculate the hash of a string. */
944const char *
945strcache2_iadd_hashed (struct strcache2 *cache, const char *str,
946 unsigned int length, unsigned int hash)
947{
948 struct strcache2_entry const *entry;
949 unsigned int idx;
950#ifndef NDEBUG
951 unsigned correct_hash;
952
953 assert (cache->case_insensitive);
954 assert (!memchr (str, '\0', length));
955 correct_hash = strcache2_case_insensitive_hash (str, length);
956 MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
957#endif /* NDEBUG */
958
959 MAKE_STATS (cache->lookup_count++);
960
961 /* Lookup the entry in the hash table, hoping for an
962 early match. If not found, enter the string at IDX. */
963 idx = STRCACHE2_MOD_IT (cache, hash);
964 entry = cache->hash_tab[idx];
965 if (!entry)
966 return strcache2_enter_string (cache, idx, str, length, hash);
967 if (strcache2_is_equal (cache, entry, str, length, hash))
968 return (const char *)(entry + 1);
969 MAKE_STATS (cache->collision_1st_count++);
970
971 entry = entry->next;
972 if (!entry)
973 return strcache2_enter_string (cache, idx, str, length, hash);
974 if (strcache2_is_equal (cache, entry, str, length, hash))
975 return (const char *)(entry + 1);
976 MAKE_STATS (cache->collision_2nd_count++);
977
978 /* Loop the rest. */
979 for (;;)
980 {
981 entry = entry->next;
982 if (!entry)
983 return strcache2_enter_string (cache, idx, str, length, hash);
984 if (strcache2_is_equal (cache, entry, str, length, hash))
985 return (const char *)(entry + 1);
986 MAKE_STATS (cache->collision_3rd_count++);
987 }
988 /* not reached */
989}
990
991/* The public lookup (case insensitive) string interface. */
992const char *
993strcache2_ilookup (struct strcache2 *cache, const char *str, unsigned int length)
994{
995 struct strcache2_entry const *entry;
996 unsigned int hash = strcache2_case_insensitive_hash (str, length);
997 unsigned int idx;
998
999 assert (cache->case_insensitive);
1000 assert (!memchr (str, '\0', length));
1001
1002 MAKE_STATS (cache->lookup_count++);
1003
1004 /* Lookup the entry in the hash table, hoping for an
1005 early match. */
1006 idx = STRCACHE2_MOD_IT (cache, hash);
1007 entry = cache->hash_tab[idx];
1008 if (!entry)
1009 return NULL;
1010 if (strcache2_is_equal (cache, entry, str, length, hash))
1011 return (const char *)(entry + 1);
1012 MAKE_STATS (cache->collision_1st_count++);
1013
1014 entry = entry->next;
1015 if (!entry)
1016 return NULL;
1017 if (strcache2_is_equal (cache, entry, str, length, hash))
1018 return (const char *)(entry + 1);
1019 MAKE_STATS (cache->collision_2nd_count++);
1020
1021 /* Loop the rest. */
1022 for (;;)
1023 {
1024 entry = entry->next;
1025 if (!entry)
1026 return NULL;
1027 if (strcache2_is_equal (cache, entry, str, length, hash))
1028 return (const char *)(entry + 1);
1029 MAKE_STATS (cache->collision_3rd_count++);
1030 }
1031 /* not reached */
1032}
1033
1034#endif /* HAVE_CASE_INSENSITIVE_FS */
1035
1036/* Is the given string cached? returns 1 if it is, 0 if it isn't. */
1037int
1038strcache2_is_cached (struct strcache2 *cache, const char *str)
1039{
1040 /* Check mandatory alignment first. */
1041 if (!((size_t)str & (sizeof (void *) - 1)))
1042 {
1043 /* Check the segment list and consider the question answered if the
1044 string is within one of them. (Could check it more thoroughly...) */
1045 struct strcache2_seg const *seg;
1046 for (seg = cache->seg_head; seg; seg = seg->next)
1047 if ((size_t)(str - seg->start) < seg->size)
1048 return 1;
1049 }
1050
1051 return 0;
1052}
1053
1054
1055/* Verify the integrity of the specified string, returning 0 if OK. */
1056int
1057strcache2_verify_entry (struct strcache2 *cache, const char *str)
1058{
1059 struct strcache2_entry const *entry;
1060 unsigned int hash;
1061 unsigned int length;
1062 const char *end;
1063
1064 entry = (struct strcache2_entry const *)str - 1;
1065 if ((size_t)entry & (STRCACHE2_ENTRY_ALIGNMENT - 1))
1066 {
1067 fprintf (stderr,
1068 "strcache2[%s]: missaligned entry %p\nstring: %p=%s\n",
1069 cache->name, (void *)entry, (void *)str, str);
1070 return -1;
1071 }
1072
1073 end = memchr (str, '\0', entry->length + 1);
1074 length = end - str;
1075 if (length != entry->length)
1076 {
1077 fprintf (stderr,
1078 "strcache2[%s]: corrupt entry %p, length: %u, expected %u;\nstring: %s\n",
1079 cache->name, (void *)entry, length, entry->length, str);
1080 return -1;
1081 }
1082
1083 hash = cache->case_insensitive
1084 ? strcache2_case_insensitive_hash (str, entry->length)
1085 : strcache2_case_sensitive_hash (str, entry->length);
1086 if (hash != entry->hash)
1087 {
1088 fprintf (stderr,
1089 "strcache2[%s]: corrupt entry %p, hash: %x, expected %x;\nstring: %s\n",
1090 cache->name, (void *)entry, hash, entry->hash, str);
1091 return -1;
1092 }
1093
1094 return 0;
1095}
1096
1097
1098/* Calculates the case sensitive hash values of the string.
1099 The first hash is returned, the other is put at HASH2P. */
1100unsigned int strcache2_hash_str (const char *str, unsigned int length, unsigned int *hash2p)
1101{
1102 *hash2p = 1;
1103 return strcache2_case_sensitive_hash (str, length);
1104}
1105
1106/* Calculates the case insensitive hash values of the string.
1107 The first hash is returned, the other is put at HASH2P. */
1108unsigned int strcache2_hash_istr (const char *str, unsigned int length, unsigned int *hash2p)
1109{
1110 *hash2p = 1;
1111 return strcache2_case_insensitive_hash (str, length);
1112}
1113
1114
1115
1116/* Initalizes a new cache. */
1117void
1118strcache2_init (struct strcache2 *cache, const char *name, unsigned int size,
1119 unsigned int def_seg_size, int case_insensitive, int thread_safe)
1120{
1121 unsigned hash_shift;
1122 assert (!thread_safe);
1123
1124 /* calc the size as a power of two */
1125 if (!size)
1126 hash_shift = STRCACHE2_HASH_SHIFT;
1127 else
1128 {
1129 assert (size <= (~0U / 2 + 1));
1130 for (hash_shift = 8; (1U << hash_shift) < size; hash_shift++)
1131 /* nothing */;
1132 }
1133
1134 /* adjust the default segment size */
1135 if (!def_seg_size)
1136 def_seg_size = STRCACHE2_SEG_SIZE;
1137 else if (def_seg_size < sizeof (struct strcache2_seg) * 10)
1138 def_seg_size = sizeof (struct strcache2_seg) * 10;
1139 else if ((def_seg_size & 0xfff) < 0xf00)
1140 def_seg_size = ((def_seg_size + 0xfff) & ~0xfffU);
1141
1142
1143 /* init the structure. */
1144 cache->case_insensitive = case_insensitive;
1145#ifdef STRCACHE2_USE_MASK
1146 cache->hash_mask = (1U << hash_shift) - 1U;
1147#else
1148 cache->hash_div = strcache2_find_prime(hash_shift);
1149#endif
1150 cache->count = 0;
1151 cache->collision_count = 0;
1152 cache->lookup_count = 0;
1153 cache->collision_1st_count = 0;
1154 cache->collision_2nd_count = 0;
1155 cache->collision_3rd_count = 0;
1156 cache->rehash_count = (1U << hash_shift) / 4 * 3; /* rehash at 75% */
1157 cache->init_size = 1U << hash_shift;
1158 cache->hash_size = 1U << hash_shift;
1159 cache->def_seg_size = def_seg_size;
1160 cache->lock = NULL;
1161 cache->name = name;
1162
1163 /* allocate the hash table and first segment. */
1164 cache->hash_tab = (struct strcache2_entry **)
1165 xmalloc (cache->init_size * sizeof (struct strcache2_entry *));
1166 memset (cache->hash_tab, '\0', cache->init_size * sizeof (struct strcache2_entry *));
1167 strcache2_new_seg (cache, 0);
1168
1169 /* link it */
1170 cache->next = strcache_head;
1171 strcache_head = cache;
1172}
1173
1174
1175/* Terminates a string cache, freeing all memory and other
1176 associated resources. */
1177void
1178strcache2_term (struct strcache2 *cache)
1179{
1180 /* unlink it */
1181 if (strcache_head == cache)
1182 strcache_head = cache->next;
1183 else
1184 {
1185 struct strcache2 *prev = strcache_head;
1186 while (prev->next != cache)
1187 prev = prev->next;
1188 assert (prev);
1189 prev->next = cache->next;
1190 }
1191
1192 /* free the memory segments */
1193 do
1194 {
1195 void *free_it = cache->seg_head;
1196 cache->seg_head = cache->seg_head->next;
1197 free (free_it);
1198 }
1199 while (cache->seg_head);
1200
1201 /* free the hash and clear the structure. */
1202 free (cache->hash_tab);
1203 memset (cache, '\0', sizeof (struct strcache2));
1204}
1205
1206/* Print statistics a string cache. */
1207void
1208strcache2_print_stats (struct strcache2 *cache, const char *prefix)
1209{
1210 unsigned int seg_count = 0;
1211 unsigned long seg_total_bytes = 0;
1212 unsigned long seg_avg_bytes;
1213 unsigned long seg_avail_bytes = 0;
1214 unsigned long seg_max_bytes = 0;
1215 struct strcache2_seg *seg;
1216 unsigned int str_count = 0;
1217 unsigned long str_total_len = 0;
1218 unsigned int str_avg_len;
1219 unsigned int str_min_len = ~0U;
1220 unsigned int str_max_len = 0;
1221 unsigned int idx;
1222 unsigned int rehashes;
1223 unsigned int chain_depths[32];
1224
1225 printf (_("\n%s strcache2: %s\n"), prefix, cache->name);
1226
1227 /* Segment statistics. */
1228 for (seg = cache->seg_head; seg; seg = seg->next)
1229 {
1230 seg_count++;
1231 seg_total_bytes += seg->size;
1232 seg_avail_bytes += seg->avail;
1233 if (seg->size > seg_max_bytes)
1234 seg_max_bytes = seg->size;
1235 }
1236 seg_avg_bytes = seg_total_bytes / seg_count;
1237 printf (_("%s %u segments: total = %lu / max = %lu / avg = %lu / def = %u avail = %lu\n"),
1238 prefix, seg_count, seg_total_bytes, seg_max_bytes, seg_avg_bytes,
1239 cache->def_seg_size, seg_avail_bytes);
1240
1241 /* String statistics. */
1242 memset (chain_depths, '\0', sizeof (chain_depths));
1243 idx = cache->hash_size;
1244 while (idx-- > 0)
1245 {
1246 struct strcache2_entry const *entry = cache->hash_tab[idx];
1247 unsigned int depth = 0;
1248 for (; entry != 0; entry = entry->next, depth++)
1249 {
1250 unsigned int length = entry->length;
1251 str_total_len += length;
1252 if (length > str_max_len)
1253 str_max_len = length;
1254 if (length < str_min_len)
1255 str_min_len = length;
1256 str_count++;
1257 }
1258 chain_depths[depth >= 32 ? 31 : depth]++;
1259 }
1260 str_avg_len = cache->count ? str_total_len / cache->count : 0;
1261 printf (_("%s %u strings: total len = %lu / max = %u / avg = %u / min = %u\n"),
1262 prefix, cache->count, str_total_len, str_max_len, str_avg_len, str_min_len);
1263 if (str_count != cache->count)
1264 printf (_("%s string count mismatch! cache->count=%u, actual count is %u\n"), prefix,
1265 cache->count, str_count);
1266
1267 /* Hash statistics. */
1268 idx = cache->init_size;
1269 rehashes = 0;
1270 while (idx < cache->hash_size)
1271 {
1272 idx *= 2;
1273 rehashes++;
1274 }
1275
1276#ifdef STRCACHE2_USE_MASK
1277 printf (_("%s hash size = %u mask = %#x rehashed %u times"),
1278 prefix, cache->hash_size, cache->hash_mask, rehashes);
1279#else
1280 printf (_("%s hash size = %u div = %#x rehashed %u times"),
1281 prefix, cache->hash_size, cache->hash_div, rehashes);
1282#endif
1283 if (cache->lookup_count)
1284 printf (_("%s lookups = %lu\n"
1285 "%s hash collisions 1st = %lu (%u%%) 2nd = %lu (%u%%) 3rd = %lu (%u%%)"),
1286 prefix, cache->lookup_count,
1287 prefix,
1288 cache->collision_1st_count, (unsigned int)((100.0 * cache->collision_1st_count) / cache->lookup_count),
1289 cache->collision_2nd_count, (unsigned int)((100.0 * cache->collision_2nd_count) / cache->lookup_count),
1290 cache->collision_3rd_count, (unsigned int)((100.0 * cache->collision_3rd_count) / cache->lookup_count));
1291 printf (_("\n%s hash insert collisions = %u (%u%%)\n"),
1292 prefix, cache->collision_count,(unsigned int)((100.0 * cache->collision_count) / cache->count));
1293 printf (_("%s %5u (%u%%) empty hash table slots\n"),
1294 prefix, chain_depths[0], (unsigned int)((100.0 * chain_depths[0]) / cache->hash_size));
1295 printf (_("%s %5u (%u%%) occupied hash table slots\n"),
1296 prefix, chain_depths[1], (unsigned int)((100.0 * chain_depths[1]) / cache->hash_size));
1297 for (idx = 2; idx < 32; idx++)
1298 {
1299 unsigned strs_at_this_depth = chain_depths[idx];
1300 unsigned i;
1301 for (i = idx + 1; i < 32; i++)
1302 strs_at_this_depth += chain_depths[i];
1303 if (strs_at_this_depth)
1304 printf (_("%s %5u (%2u%%) with %u string%s chained on; %5u (%2u%%) strings at depth %u.\n"),
1305 prefix, chain_depths[idx], (unsigned int)((100.0 * chain_depths[idx]) / (cache->count - cache->collision_count)),
1306 idx - 1, idx == 2 ? " " : "s",
1307 strs_at_this_depth, (unsigned int)((100.0 * strs_at_this_depth) / cache->count), idx - 1);
1308 }
1309}
1310
1311/* Print statistics for all string caches. */
1312void
1313strcache2_print_stats_all (const char *prefix)
1314{
1315 struct strcache2 *cur;
1316 for (cur = strcache_head; cur; cur = cur->next)
1317 strcache2_print_stats (cur, prefix);
1318}
1319
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette