VirtualBox

Changeset 1920 in kBuild


Ignore:
Timestamp:
Oct 24, 2008 12:00:27 AM (16 years ago)
Author:
bird
Message:

strcache2: don't hash strings that aren't small (experimental).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/kmk/strcache2.c

    r1917 r1920  
    7777#endif
    7878
     79# if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
     80 || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
     81#  define strcache2_get_unaligned_16bits(ptr)   ( *((const uint16_t *)(ptr)))
     82# else
     83   /* (endian doesn't matter) */
     84#  define strcache2_get_unaligned_16bits(ptr)   (   (((const uint8_t *)(ptr))[0] << 8) \
     85                                                  | (((const uint8_t *)(ptr))[1]) )
     86# endif
     87
    7988
    8089/*******************************************************************************
     
    117126}
    118127
     128/* The following is a bit experiment. It produces longer chains, i.e. worse
     129   distribution of the strings in the table, however the actual make
     130   performances is better (<time).  The explanation is probably that the
     131   collisions only really increase for entries that aren't looked up that
     132   much and that it actually improoves the situation for those that is. Or
     133   that we spend so much less time hashing that it makes up (and more) for
     134   the pentalty we suffer from the longer chains and worse distribution.
     135
     136   XXX: Check how this works out with different path lengths. I suspect it
     137        might depend on the length of PATH_ROOT and the depth of the files
     138        in the project as well. If it does, this might make matters worse
     139        for some and better for others which isn't very cool...  */
     140
     141#define BIG_HASH_SIZE   32
     142#define BIG_HASH_TAIL   12
     143#define BIG_HASH_HEAD   16
     144
     145#ifdef BIG_HASH_SIZE
     146/* long string: hash head and tail, drop the middle. */
    119147MY_INLINE unsigned int
    120 strcache2_case_sensitive_hash_1 (const char *str, unsigned int len)
     148strcache2_case_sensitive_hash_big (const char *str, unsigned int len)
     149{
     150  uint32_t hash = len;
     151  uint32_t tmp;
     152  unsigned int head;
     153
     154  /* head BIG_HASH_HEAD bytes */
     155  head = (BIG_HASH_HEAD >> 2);
     156  while (head-- > 0)
     157    {
     158      hash += strcache2_get_unaligned_16bits (str);
     159      tmp   = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
     160      hash  = (hash << 16) ^ tmp;
     161      str  += 2 * sizeof (uint16_t);
     162      hash += hash >> 11;
     163    }
     164
     165  /* tail BIG_HASH_TAIL bytes (minus the odd ones) */
     166  str += (len - BIG_HASH_HEAD - BIG_HASH_TAIL) & ~3U;
     167  head = (BIG_HASH_TAIL >> 2);
     168  while (head-- > 0)
     169    {
     170      hash += strcache2_get_unaligned_16bits (str);
     171      tmp   = (strcache2_get_unaligned_16bits (str + 2) << 11) ^ hash;
     172      hash  = (hash << 16) ^ tmp;
     173      str  += 2 * sizeof (uint16_t);
     174      hash += hash >> 11;
     175    }
     176
     177  /* force "avalanching" of final 127 bits. */
     178  hash ^= hash << 3;
     179  hash += hash >> 5;
     180  hash ^= hash << 4;
     181  hash += hash >> 17;
     182  hash ^= hash << 25;
     183  hash += hash >> 6;
     184
     185  return hash;
     186}
     187#endif /* BIG_HASH_SIZE */
     188
     189MY_INLINE unsigned int
     190strcache2_case_sensitive_hash (const char *str, unsigned int len)
    121191{
    122192#if 1
     
    134204     FIXME: A path for well aligned data should be added to speed up
    135205            execution on alignment sensitive systems.  */
    136 
    137 # if defined(__amd64__) || defined(__x86_64__) || defined(__AMD64__) || defined(_M_X64) || defined(__amd64) \
    138  || defined(__i386__) || defined(__x86__) || defined(__X86__) || defined(_M_IX86) || defined(__i386)
    139 #  define strcache2_get_unaligned_16bits(ptr)   ( *((const uint16_t *)(ptr)))
    140 # else
    141    /* (endian doesn't matter) */
    142 #  define strcache2_get_unaligned_16bits(ptr)   (   (((const uint8_t *)(ptr))[0] << 8) \
    143                                                   | (((const uint8_t *)(ptr))[1]) )
     206  unsigned int rem;
     207  uint32_t hash;
     208  uint32_t tmp;
     209
     210  assert (sizeof (uint8_t) == sizeof (char));
     211
     212# ifdef BIG_HASH_SIZE
     213  /* long string? */
     214  if (len >= BIG_HASH_SIZE)
     215    return strcache2_case_sensitive_hash_big (str, len);
    144216# endif
    145   uint32_t hash = len;
    146   uint32_t tmp;
    147   int rem;
    148 
    149   assert (sizeof (uint8_t) == sizeof (char));
    150   if (len == 0)
    151     return 0;
    152 
    153   /* main loop, walking on 2 x uint16_t */
     217
     218  /* short string: main loop, walking on 2 x uint16_t */
     219  hash = len;
    154220  rem = len & 3;
    155221  len >>= 2;
     
    164230    }
    165231
    166   /* the remainer */
     232  /* the remainder */
    167233  switch (rem)
    168234    {
     
    290356
    291357MY_INLINE unsigned int
    292 strcache2_case_insensitive_hash_1 (const char *str, unsigned int len)
     358strcache2_case_insensitive_hash (const char *str, unsigned int len)
    293359{
    294360  unsigned int hash = 0;
     
    684750{
    685751  struct strcache2_entry const *entry;
    686   unsigned int hash = strcache2_case_sensitive_hash_1 (str, length);
     752  unsigned int hash = strcache2_case_sensitive_hash (str, length);
    687753  unsigned int idx;
    688754
     
    733799
    734800  assert (!cache->case_insensitive);
    735   correct_hash = strcache2_case_sensitive_hash_1 (str, length);
     801  correct_hash = strcache2_case_sensitive_hash (str, length);
    736802  MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
    737803#endif /* NDEBUG */
     
    774840{
    775841  struct strcache2_entry const *entry;
    776   unsigned int hash = strcache2_case_sensitive_hash_1 (str, length);
     842  unsigned int hash = strcache2_case_sensitive_hash (str, length);
    777843  unsigned int idx;
    778844
     
    818884{
    819885  struct strcache2_entry const *entry;
    820   unsigned int hash = strcache2_case_insensitive_hash_1 (str, length);
     886  unsigned int hash = strcache2_case_insensitive_hash (str, length);
    821887  unsigned int idx;
    822888
     
    867933
    868934  assert (!cache->case_insensitive);
    869   correct_hash = strcache2_case_insensitive_hash_1 (str, length);
     935  correct_hash = strcache2_case_insensitive_hash (str, length);
    870936  MY_ASSERT_MSG (hash == correct_hash, ("%#x != %#x\n", hash, correct_hash));
    871937#endif /* NDEBUG */
     
    908974{
    909975  struct strcache2_entry const *entry;
    910   unsigned int hash = strcache2_case_insensitive_hash_1 (str, length);
     976  unsigned int hash = strcache2_case_insensitive_hash (str, length);
    911977  unsigned int idx;
    912978
     
    9931059
    9941060  hash = cache->case_insensitive
    995     ? strcache2_case_insensitive_hash_1 (str, entry->length)
    996     : strcache2_case_sensitive_hash_1 (str, entry->length);
     1061    ? strcache2_case_insensitive_hash (str, entry->length)
     1062    : strcache2_case_sensitive_hash (str, entry->length);
    9971063  if (hash != entry->hash)
    9981064    {
     
    10121078{
    10131079  *hash2p = 1;
    1014   return    strcache2_case_sensitive_hash_1 (str, length);
     1080  return    strcache2_case_sensitive_hash (str, length);
    10151081}
    10161082
     
    10201086{
    10211087  *hash2p = 1;
    1022   return    strcache2_case_insensitive_hash_1 (str, length);
     1088  return    strcache2_case_insensitive_hash (str, length);
    10231089}
    10241090
Note: See TracChangeset for help on using the changeset viewer.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette