$treeview $search $mathjax $extrastylesheet
librsync
2.0.2
$projectbrief
|
$projectbrief
|
$searchbox |
00001 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- 00002 * 00003 * hashtable.h -- a generic open addressing hashtable. 00004 * 00005 * Copyright (C) 2003 by Donovan Baarda <abo@minkirri.apana.org.au> 00006 * 00007 * This program is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU Lesser General Public License as published by 00009 * the Free Software Foundation; either version 2.1 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * This program is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ 00020 #ifndef _HASHTABLE_H_ 00021 # define _HASHTABLE_H_ 00022 00023 # include <assert.h> 00024 # include <stdlib.h> 00025 00026 /** \file hashtable.h A generic open addressing hashtable. 00027 * 00028 * This is a minimal hashtable containing pointers to arbitrary entries with 00029 * configurable hashtable size and support for custom hash() and cmp() methods. 00030 * The cmp() method can either be a simple comparison between two keys, or can 00031 * be against a special match object containing additional mutable state. This 00032 * allows for things like deferred and cached evaluation of costly comparison 00033 * data. The hash() function doesn't need to avoid clustering behaviour. 00034 * 00035 * It uses open addressing with quadratic probing for collisions. The 00036 * MurmurHash3 finalization function is used on the hash() output to avoid 00037 * clustering. There is no support for removing entries, only adding them. 00038 * Multiple entries with the same key can be added, and you can use a fancy 00039 * cmp() function to find particular entries by more than just their key. There 00040 * is an iterator for iterating through all entries in the hashtable. There are 00041 * optional hashtable_find() find/match/hashcmp/entrycmp stats counters that 00042 * can be disabled by defining HASHTABLE_NSTATS. 00043 * 00044 * The types and methods of the hashtable and its contents are specified by 00045 * using \#define parameters set to their basenames (the prefixes for the *_t 00046 * type and *_func() methods) before doing \#include "hashtable.h". This 00047 * produces static inline type-safe methods that are either application 00048 * optimized for speed or wrappers around void* implementation methods for 00049 * compactness. 00050 * 00051 * \param ENTRY - the entry type basename. 00052 * 00053 * \param KEY - optional key type basename (default: ENTRY). 00054 * 00055 * \param MATCH - optional match type basename (default: KEY). 00056 * 00057 * \param NAME - optional hashtable type basename (default: ENTRY_hashtable). 00058 * 00059 * Example: \code 00060 * typedef ... mykey_t; 00061 * int mykey_hash(const mykey_t *e); 00062 * int mykey_cmp(mykey_t *e, const mykey_t *o); 00063 * 00064 * typedef struct myentry { 00065 * mykey_t key; // Inherit from mykey_t. 00066 * ...extra entry value data... 00067 * } myentry_t; 00068 * void myentry_init(myentry_t *e, ...); 00069 * 00070 * #define ENTRY myentry 00071 * #define KEY mykey 00072 * #include "hashtable.h" 00073 * 00074 * hashtable_t *t; 00075 * myentry_t entries[300]; 00076 * mykey_t k; 00077 * myentry_t *e; 00078 * 00079 * t = myentry_hashtable_new(300); 00080 * myentry_init(&entries[5], ...); 00081 * myentry_hashtable_add(t, &entries[5]); 00082 * k = ...; 00083 * e = myentry_hashtable_find(t, &k); 00084 * 00085 * hashtable_iter_t i; 00086 * for (e = myentry_hashtable_iter(&i, t); e != NULL; 00087 * e = myentry_hashtable_next(&i)) 00088 * ... 00089 * 00090 * myentry_hashtable_free(t); 00091 * \endcode 00092 * 00093 * The mykey_hash() and mykey_cmp() fuctions will typically take pointers to 00094 * mykey/myentry instances the same as the pointers stored in the hashtable. 00095 * However it is also possible for them to take "match objects" that are a 00096 * "subclass" of the entry type that contain additional state for complicated 00097 * comparision operations. 00098 * 00099 * Example: \code 00100 * typedef struct mymatch { 00101 * mykey_t key; // Inherit from mykey_t; 00102 * ...extra match criteria and state data... 00103 * } mymatch_t; 00104 * int mymatch_cmp(mymatch_t *m, const myentry_t *e); 00105 * 00106 * #define ENTRY myentry 00107 * #define KEY mykey 00108 * #define MATCH mymatch 00109 * #include "hashtable.h" 00110 * 00111 * ... 00112 * mymatch_t m; 00113 * 00114 * t = myentry_hashtable_new(300); 00115 * ... 00116 * m = ...; 00117 * e = myentry_hashtable_find(t, &m); 00118 * \endcode 00119 * 00120 * The mymatch_cmp() function is only called for finding hashtable entries and 00121 * can mutate the mymatch_t object for doing things like deferred and cached 00122 * evaluation of expensive match data. It can also access the whole myentry_t 00123 * object to match against more than just the key. */ 00124 00125 /** The hashtable type. */ 00126 typedef struct hashtable { 00127 int size; /**< Size of allocated hashtable. */ 00128 int count; /**< Number of entries in hashtable. */ 00129 # ifndef HASHTABLE_NSTATS 00130 /* The following are for accumulating hashtable_find() stats. */ 00131 long find_count; /**< The count of finds tried. */ 00132 long match_count; /**< The count of matches found. */ 00133 long hashcmp_count; /**< The count of hash compares done. */ 00134 long entrycmp_count; /**< The count of entry compares done. */ 00135 # endif 00136 void **etable; /**< Table of pointers to entries. */ 00137 unsigned ktable[]; /**< Table of hash keys. */ 00138 } hashtable_t; 00139 00140 /** The hashtable iterator type. */ 00141 typedef struct hashtable_iter { 00142 hashtable_t *htable; /**< The hashtable to iterate over. */ 00143 int index; /**< The index to scan from next. */ 00144 } hashtable_iter_t; 00145 00146 /* void* implementations for the type-safe static inline wrappers below. */ 00147 hashtable_t *_hashtable_new(int size); 00148 void _hashtable_free(hashtable_t *t); 00149 void *_hashtable_iter(hashtable_iter_t *i, hashtable_t *t); 00150 void *_hashtable_next(hashtable_iter_t *i); 00151 00152 /** MurmurHash3 finalization mix function. */ 00153 static inline unsigned mix32(unsigned int h) 00154 { 00155 h ^= h >> 16; 00156 h *= 0x85ebca6b; 00157 h ^= h >> 13; 00158 h *= 0xc2b2ae35; 00159 h ^= h >> 16; 00160 return h; 00161 } 00162 00163 #endif /* _HASHTABLE_H_ */ 00164 00165 /* If ENTRY is defined, define type-dependent static inline methods. */ 00166 #ifdef ENTRY 00167 00168 # define _JOIN2(x, y) x##y 00169 # define _JOIN(x, y) _JOIN2(x, y) 00170 00171 # ifndef KEY 00172 # define KEY ENTRY 00173 # endif 00174 00175 # ifndef MATCH 00176 # define MATCH KEY 00177 # endif 00178 00179 # ifndef NAME 00180 # define NAME _JOIN(ENTRY, _hashtable) 00181 # endif 00182 00183 # define ENTRY_T _JOIN(ENTRY, _t) /**< The entry type. */ 00184 # define KEY_T _JOIN(KEY, _t) /**< The key type. */ 00185 # define MATCH_T _JOIN(MATCH, _t) /**< The match type. */ 00186 /** The key hash(k) method. */ 00187 # define KEY_HASH(k) _JOIN(KEY, _hash)(k) 00188 /** The match cmp(m, e) method. */ 00189 # define MATCH_CMP(m, e) _JOIN(MATCH, _cmp)(m, e) 00190 # define _FUNC(f) _JOIN(NAME, f) 00191 00192 /* Loop macro for probing table t for key k, setting hk to the hash for k 00193 reserving zero for empty buckets, and iterating with index i and entry hash 00194 h, terminating at an empty bucket. */ 00195 # define _for_probe(t, k, hk, i, h) \ 00196 const unsigned mask = t->size - 1;\ 00197 unsigned hk = KEY_HASH((KEY_T *)k), i, s, h;\ 00198 hk = hk ? hk : -1;\ 00199 for (i = mix32(hk) & mask, s = 0; (h = t->ktable[i]); i = (i + ++s) & mask) 00200 00201 /* Conditional macro for incrementing stats counters. */ 00202 # ifndef HASHTABLE_NSTATS 00203 # define _stats_inc(c) (c++) 00204 # else 00205 # define _stats_inc(c) 00206 # endif 00207 00208 /** Allocate and initialize a hashtable instance. 00209 * 00210 * The provided size is used as an indication of the number of entries you wish 00211 * to add, but the allocated size will probably be larger depending on the 00212 * implementation to enable optimisations or avoid degraded performance. It may 00213 * be possible to fill the table beyond the requested size, but performance can 00214 * start to degrade badly if it is over filled. 00215 * 00216 * \param size - The desired minimum size of the hash table. 00217 * 00218 * \return The initialized hashtable instance or NULL if it failed. */ 00219 static inline hashtable_t *_FUNC(_new) (int size) { 00220 return _hashtable_new(size); 00221 } 00222 00223 /** Destroy and free a hashtable instance. 00224 * 00225 * This will free the hashtable, but will not free the entries in the 00226 * hashtable. If you want to free the entries too, use a hashtable iterator to 00227 * free the the entries first. 00228 * 00229 * \param *t - The hashtable to destroy and free. */ 00230 static inline void _FUNC(_free) (hashtable_t *t) { 00231 _hashtable_free(t); 00232 } 00233 00234 /** Initialize hashtable stats counters. 00235 * 00236 * This will reset all the stats counters for the hashtable, 00237 * 00238 * \param *t - The hashtable to initializ stats for. */ 00239 static inline void _FUNC(_stats_init) (hashtable_t *t) { 00240 # ifndef HASHTABLE_NSTATS 00241 t->find_count = t->match_count = t->hashcmp_count = t->entrycmp_count = 0; 00242 # endif 00243 } 00244 00245 /** Add an entry to a hashtable. 00246 * 00247 * This doesn't use MATCH_CMP() or do any checks for existing copies or 00248 * instances, so it will add duplicates. If you want to avoid adding 00249 * duplicates, use hashtable_find() to check for existing entries first. 00250 * 00251 * \param *t - The hashtable to add to. 00252 * 00253 * \param *e - The entry object to add. 00254 * 00255 * \return The added entry, or NULL if the table is full. */ 00256 static inline ENTRY_T *_FUNC(_add) (hashtable_t *t, ENTRY_T * e) { 00257 assert(e != NULL); 00258 if (t->count + 1 == t->size) 00259 return NULL; 00260 _for_probe(t, e, he, i, h); 00261 t->count++; 00262 t->ktable[i] = he; 00263 return t->etable[i] = e; 00264 } 00265 00266 /** Find an entry in a hashtable. 00267 * 00268 * Uses MATCH_CMP() to find the first matching entry in the table in the same 00269 * hash() bucket. 00270 * 00271 * \param *t - The hashtable to search. 00272 * 00273 * \param *m - The key or match object to search for. 00274 * 00275 * \return The first found entry, or NULL if nothing was found. */ 00276 static inline ENTRY_T *_FUNC(_find) (hashtable_t *t, MATCH_T * m) { 00277 assert(m != NULL); 00278 ENTRY_T *e; 00279 00280 _stats_inc(t->find_count); 00281 _for_probe(t, m, hm, i, he) { 00282 _stats_inc(t->hashcmp_count); 00283 if (hm == he) { 00284 _stats_inc(t->entrycmp_count); 00285 if (!MATCH_CMP(m, e = t->etable[i])) { 00286 _stats_inc(t->match_count); 00287 return e; 00288 } 00289 } 00290 } 00291 return NULL; 00292 } 00293 00294 /** Initialize a hashtable_iter_t and return the first entry. 00295 * 00296 * This works together with hashtable_next() for iterating through all entries 00297 * in a hashtable. 00298 * 00299 * Example: \code 00300 * for (e = hashtable_iter(&i, t); e != NULL; e = hashtable_next(&i)) 00301 * ... 00302 * \endcode 00303 * 00304 * \param *i - the hashtable iterator to initialize. 00305 * 00306 * \param *t - the hashtable to iterate over. 00307 * 00308 * \return The first entry or NULL if the hashtable is empty. */ 00309 static inline ENTRY_T *_FUNC(_iter) (hashtable_iter_t *i, hashtable_t *t) { 00310 return _hashtable_iter(i, t); 00311 } 00312 00313 /** Get the next entry from a hashtable iterator or NULL when finished. 00314 * 00315 * \param *i - the hashtable iterator to use. 00316 * 00317 * \return The next entry or NULL if the iterator is finished. */ 00318 static inline ENTRY_T *_FUNC(_next) (hashtable_iter_t *i) { 00319 return _hashtable_next(i); 00320 } 00321 00322 # undef ENTRY 00323 # undef KEY 00324 # undef MATCH 00325 # undef NAME 00326 # undef ENTRY_T 00327 # undef KEY_T 00328 # undef MATCH_T 00329 # undef KEY_HASH 00330 # undef MATCH_CMP 00331 # undef _FUNC 00332 #endif /* ENTRY */