$treeview $search $mathjax $extrastylesheet
librsync
2.3.0
$projectbrief
|
$projectbrief
|
$searchbox |
00001 /*= -*- c-basic-offset: 4; indent-tabs-mode: nil; -*- 00002 * 00003 * hashtable.h -- a generic open addressing hashtable. 00004 * 00005 * Copyright (C) 2003 by Donovan Baarda <abo@minkirri.apana.org.au> 00006 * 00007 * This program is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU Lesser General Public License as published by 00009 * the Free Software Foundation; either version 2.1 of the License, or 00010 * (at your option) any later version. 00011 * 00012 * This program is distributed in the hope that it will be useful, 00013 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 * GNU Lesser General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU Lesser General Public License 00018 * along with this program; if not, write to the Free Software 00019 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ 00020 #ifndef _HASHTABLE_H_ 00021 # define _HASHTABLE_H_ 00022 00023 # include <assert.h> 00024 # include <stdlib.h> 00025 00026 /** \file hashtable.h 00027 * A generic open addressing hashtable. 00028 * 00029 * This is a minimal hashtable containing pointers to arbitrary entries with 00030 * configurable hashtable size and support for custom hash() and cmp() methods. 00031 * The cmp() method can either be a simple comparison between two keys, or can 00032 * be against a special match object containing additional mutable state. This 00033 * allows for things like deferred and cached evaluation of costly comparison 00034 * data. The hash() function doesn't need to avoid clustering behaviour. 00035 * 00036 * It uses open addressing with quadratic probing for collisions. The 00037 * MurmurHash3 finalization function is optionally used on the hash() output to 00038 * avoid clustering and can be disabled by setting HASHTABLE_NMIX32. There is 00039 * no support for removing entries, only adding them. Multiple entries with the 00040 * same key can be added, and you can use a fancy cmp() function to find 00041 * particular entries by more than just their key. There is an iterator for 00042 * iterating through all entries in the hashtable. There are optional 00043 * NAME_find() find/match/hashcmp/entrycmp stats counters that can be disabled 00044 * by defining HASHTABLE_NSTATS. 00045 * 00046 * The types and methods of the hashtable and its contents are specified by 00047 * using \#define parameters set to their basenames (the prefixes for the *_t 00048 * type and *_func() methods) before doing \#include "hashtable.h". This 00049 * produces static inline type-safe methods that are either application 00050 * optimized for speed or wrappers around void* implementation methods for 00051 * compactness. 00052 * 00053 * \param ENTRY - the entry type basename. 00054 * 00055 * \param KEY - optional key type basename (default: ENTRY). 00056 * 00057 * \param MATCH - optional match type basename (default: KEY). 00058 * 00059 * \param NAME - optional hashtable type basename (default: ENTRY_hashtable). 00060 * 00061 * Example: \code 00062 * typedef ... mykey_t; 00063 * int mykey_hash(const mykey_t *e); 00064 * int mykey_cmp(mykey_t *e, const mykey_t *o); 00065 * 00066 * typedef struct myentry { 00067 * mykey_t key; // Inherit from mykey_t. 00068 * ...extra entry value data... 00069 * } myentry_t; 00070 * void myentry_init(myentry_t *e, ...); 00071 * 00072 * #define ENTRY myentry 00073 * #define KEY mykey 00074 * #include "hashtable.h" 00075 * 00076 * hashtable_t *t; 00077 * myentry_t entries[300]; 00078 * mykey_t k; 00079 * myentry_t *e; 00080 * 00081 * t = myentry_hashtable_new(300); 00082 * myentry_init(&entries[5], ...); 00083 * myentry_hashtable_add(t, &entries[5]); 00084 * k = ...; 00085 * e = myentry_hashtable_find(t, &k); 00086 * 00087 * int i; 00088 * for (e = myentry_hashtable_iter(t, &i); e != NULL; 00089 * e = myentry_hashtable_next(t, &i)) 00090 * ... 00091 * 00092 * myentry_hashtable_free(t); 00093 * \endcode 00094 * 00095 * The mykey_hash() and mykey_cmp() fuctions will typically take pointers to 00096 * mykey/myentry instances the same as the pointers stored in the hashtable. 00097 * However it is also possible for them to take "match objects" that are a 00098 * "subclass" of the entry type that contain additional state for complicated 00099 * comparision operations. 00100 * 00101 * Example: \code 00102 * typedef struct mymatch { 00103 * mykey_t key; // Inherit from mykey_t; 00104 * ...extra match criteria and state data... 00105 * } mymatch_t; 00106 * int mymatch_cmp(mymatch_t *m, const myentry_t *e); 00107 * 00108 * #define ENTRY myentry 00109 * #define KEY mykey 00110 * #define MATCH mymatch 00111 * #include "hashtable.h" 00112 * 00113 * ... 00114 * mymatch_t m; 00115 * 00116 * t = myentry_hashtable_new(300); 00117 * ... 00118 * m = ...; 00119 * e = myentry_hashtable_find(t, &m); 00120 * \endcode 00121 * 00122 * The mymatch_cmp() function is only called for finding hashtable entries and 00123 * can mutate the mymatch_t object for doing things like deferred and cached 00124 * evaluation of expensive match data. It can also access the whole myentry_t 00125 * object to match against more than just the key. */ 00126 00127 /** The hashtable type. */ 00128 typedef struct hashtable { 00129 int size; /**< Size of allocated hashtable. */ 00130 int count; /**< Number of entries in hashtable. */ 00131 # ifndef HASHTABLE_NSTATS 00132 /* The following are for accumulating NAME_find() stats. */ 00133 long find_count; /**< The count of finds tried. */ 00134 long match_count; /**< The count of matches found. */ 00135 long hashcmp_count; /**< The count of hash compares done. */ 00136 long entrycmp_count; /**< The count of entry compares done. */ 00137 # endif 00138 void **etable; /**< Table of pointers to entries. */ 00139 unsigned ktable[]; /**< Table of hash keys. */ 00140 } hashtable_t; 00141 00142 /* void* implementations for the type-safe static inline wrappers below. */ 00143 hashtable_t *_hashtable_new(int size); 00144 void _hashtable_free(hashtable_t *t); 00145 00146 /** MurmurHash3 finalization mix function. */ 00147 static inline unsigned mix32(unsigned int h) 00148 { 00149 h ^= h >> 16; 00150 h *= 0x85ebca6b; 00151 h ^= h >> 13; 00152 h *= 0xc2b2ae35; 00153 h ^= h >> 16; 00154 return h; 00155 } 00156 00157 #endif /* _HASHTABLE_H_ */ 00158 00159 /* If ENTRY is defined, define type-dependent static inline methods. */ 00160 #ifdef ENTRY 00161 00162 # define _JOIN2(x, y) x##y 00163 # define _JOIN(x, y) _JOIN2(x, y) 00164 00165 # ifndef KEY 00166 # define KEY ENTRY 00167 # endif 00168 00169 # ifndef MATCH 00170 # define MATCH KEY 00171 # endif 00172 00173 # ifndef NAME 00174 # define NAME _JOIN(ENTRY, _hashtable) 00175 # endif 00176 00177 # define ENTRY_t _JOIN(ENTRY, _t) /**< The entry type. */ 00178 # define KEY_t _JOIN(KEY, _t) /**< The key type. */ 00179 # define MATCH_t _JOIN(MATCH, _t) /**< The match type. */ 00180 # define KEY_hash _JOIN(KEY, _hash) /**< The key hash(k) method. */ 00181 # define MATCH_cmp _JOIN(MATCH, _cmp) /**< The match cmp(m, e) method. */ 00182 /* The names for all the hashtable methods. */ 00183 # define NAME_new _JOIN(NAME, _new) 00184 # define NAME_free _JOIN(NAME, _free) 00185 # define NAME_stats_init _JOIN(NAME, _stats_init) 00186 # define NAME_add _JOIN(NAME, _add) 00187 # define NAME_find _JOIN(NAME, _find) 00188 # define NAME_iter _JOIN(NAME, _iter) 00189 # define NAME_next _JOIN(NAME, _next) 00190 00191 /* Modified hash() with/without mix32(). */ 00192 # ifdef HASHTABLE_NMIX32 00193 # define _KEY_HASH(k) KEY_hash((KEY_t *)k) 00194 # else 00195 # define _KEY_HASH(k) mix32(KEY_hash((KEY_t *)k)) 00196 # endif 00197 00198 /* Loop macro for probing table t for key k, setting hk to the hash for k 00199 reserving zero for empty buckets, and iterating with index i and entry hash 00200 h, terminating at an empty bucket. */ 00201 # define _for_probe(t, k, hk, i, h) \ 00202 const unsigned mask = t->size - 1;\ 00203 unsigned hk = _KEY_HASH(k), i, s, h;\ 00204 if (hk == 0) hk = -1;\ 00205 for (i = hk & mask, s = 0; (h = t->ktable[i]); i = (i + ++s) & mask) 00206 00207 /* Conditional macro for incrementing stats counters. */ 00208 # ifndef HASHTABLE_NSTATS 00209 # define _stats_inc(c) (c++) 00210 # else 00211 # define _stats_inc(c) 00212 # endif 00213 00214 /** Allocate and initialize a hashtable instance. 00215 * 00216 * The provided size is used as an indication of the number of entries you wish 00217 * to add, but the allocated size will probably be larger depending on the 00218 * implementation to enable optimisations or avoid degraded performance. It may 00219 * be possible to fill the table beyond the requested size, but performance can 00220 * start to degrade badly if it is over filled. 00221 * 00222 * \param size - The desired minimum size of the hash table. 00223 * 00224 * \return The initialized hashtable instance or NULL if it failed. */ 00225 static inline hashtable_t *NAME_new(int size) 00226 { 00227 return _hashtable_new(size); 00228 } 00229 00230 /** Destroy and free a hashtable instance. 00231 * 00232 * This will free the hashtable, but will not free the entries in the 00233 * hashtable. If you want to free the entries too, use a hashtable iterator to 00234 * free the the entries first. 00235 * 00236 * \param *t - The hashtable to destroy and free. */ 00237 static inline void NAME_free(hashtable_t *t) 00238 { 00239 _hashtable_free(t); 00240 } 00241 00242 /** Initialize hashtable stats counters. 00243 * 00244 * This will reset all the stats counters for the hashtable, 00245 * 00246 * \param *t - The hashtable to initializ stats for. */ 00247 static inline void NAME_stats_init(hashtable_t *t) 00248 { 00249 # ifndef HASHTABLE_NSTATS 00250 t->find_count = t->match_count = t->hashcmp_count = t->entrycmp_count = 0; 00251 # endif 00252 } 00253 00254 /** Add an entry to a hashtable. 00255 * 00256 * This doesn't use MATCH_cmp() or do any checks for existing copies or 00257 * instances, so it will add duplicates. If you want to avoid adding 00258 * duplicates, use NAME_find() to check for existing entries first. 00259 * 00260 * \param *t - The hashtable to add to. 00261 * 00262 * \param *e - The entry object to add. 00263 * 00264 * \return The added entry, or NULL if the table is full. */ 00265 static inline ENTRY_t *NAME_add(hashtable_t *t, ENTRY_t *e) 00266 { 00267 assert(e != NULL); 00268 if (t->count + 1 == t->size) 00269 return NULL; 00270 _for_probe(t, e, he, i, h); 00271 t->count++; 00272 t->ktable[i] = he; 00273 return t->etable[i] = e; 00274 } 00275 00276 /** Find an entry in a hashtable. 00277 * 00278 * Uses MATCH_cmp() to find the first matching entry in the table in the same 00279 * hash() bucket. 00280 * 00281 * \param *t - The hashtable to search. 00282 * 00283 * \param *m - The key or match object to search for. 00284 * 00285 * \return The first found entry, or NULL if nothing was found. */ 00286 static inline ENTRY_t *NAME_find(hashtable_t *t, MATCH_t *m) 00287 { 00288 assert(m != NULL); 00289 ENTRY_t *e; 00290 00291 _stats_inc(t->find_count); 00292 _for_probe(t, m, hm, i, he) { 00293 _stats_inc(t->hashcmp_count); 00294 if (hm == he) { 00295 _stats_inc(t->entrycmp_count); 00296 if (!MATCH_cmp(m, e = t->etable[i])) { 00297 _stats_inc(t->match_count); 00298 return e; 00299 } 00300 } 00301 } 00302 return NULL; 00303 } 00304 00305 static inline ENTRY_t *NAME_next(hashtable_t *t, int *i); 00306 00307 /** Initialize a iteration and return the first entry. 00308 * 00309 * This works together with NAME_next() for iterating through all entries in a 00310 * hashtable. 00311 * 00312 * Example: \code 00313 * for (e = NAME_iter(t, &i); e != NULL; e = NAME_next(t, &i)) 00314 * ... 00315 * \endcode 00316 * 00317 * \param *t - the hashtable to iterate over. 00318 * 00319 * \param *i - the int iterator index to initialize. 00320 * 00321 * \return The first entry or NULL if the hashtable is empty. */ 00322 static inline ENTRY_t *NAME_iter(hashtable_t *t, int *i) 00323 { 00324 assert(t != NULL); 00325 assert(i != NULL); 00326 *i = 0; 00327 return NAME_next(t, i); 00328 } 00329 00330 /** Get the next entry from a hashtable iterator or NULL when finished. 00331 * 00332 * This works together with NAME_iter() for iterating through all entries in a 00333 * hashtable. 00334 * 00335 * \param *t - the hashtable to iterate over. 00336 * 00337 * \param *i - the int iterator index to use. 00338 * 00339 * \return The next entry or NULL if the iterator is finished. */ 00340 static inline ENTRY_t *NAME_next(hashtable_t *t, int *i) 00341 { 00342 assert(t != NULL); 00343 assert(i != NULL); 00344 ENTRY_t *e = NULL; 00345 00346 while ((*i < t->size) && !(e = t->etable[(*i)++])) ; 00347 return e; 00348 } 00349 00350 # undef ENTRY 00351 # undef KEY 00352 # undef MATCH 00353 # undef NAME 00354 # undef ENTRY_t 00355 # undef KEY_t 00356 # undef MATCH_t 00357 # undef KEY_hash 00358 # undef MATCH_cmp 00359 # undef NAME_new 00360 # undef NAME_free 00361 # undef NAME_stats_init 00362 # undef NAME_add 00363 # undef NAME_find 00364 # undef NAME_iter 00365 # undef NAME_next 00366 # undef _KEY_HASH 00367 #endif /* ENTRY */