From ba123c650193eb37ab152b5083b67364e8bacf30 Mon Sep 17 00:00:00 2001 From: Peter Verthez Date: Wed, 2 Jan 2002 12:22:16 +0000 Subject: [PATCH] Use hash implementation from kazlib. --- AUTHORS | 2 + gedcom/Makefile.am | 6 +- gedcom/encoding.c | 81 ++-- gedcom/encoding.h | 4 - gedcom/hash.c | 1042 ++++++++++++++++++++++++++++++++++++++++++++ gedcom/hash.h | 247 +++++++++++ 6 files changed, 1343 insertions(+), 39 deletions(-) create mode 100644 gedcom/hash.c create mode 100644 gedcom/hash.h diff --git a/AUTHORS b/AUTHORS index 1f55b05..2078e5f 100644 --- a/AUTHORS +++ b/AUTHORS @@ -12,6 +12,8 @@ Integrated external code and data: - Skeleton for gconv : Ulrich Drepper - Gedcom test files : Heiner Eichmann (http://heiner-eichmann.de/gedcom/gedcom.htm) + - Hash table code : Kaz Kylheku + (http://users.footprints.net/~kaz/kazlib.html) # $Id$ # $Name$ diff --git a/gedcom/Makefile.am b/gedcom/Makefile.am index 21d8b20..0fd201e 100644 --- a/gedcom/Makefile.am +++ b/gedcom/Makefile.am @@ -22,7 +22,8 @@ libgedcom_la_SOURCES = lex.gedcom_1byte_.c \ multilex.c \ encoding.c \ interface.c \ - date.c + date.c \ + hash.c libgedcom_la_LDFLAGS = -version-info $(LIBVERSION) libgedcom_la_LIBADD = calendar/libcalendar.la BUILT_SOURCES = lex.gedcom_1byte_.c \ @@ -37,7 +38,8 @@ noinst_HEADERS = encoding.h \ gedcom_internal.h \ interface.h \ multilex.h \ - date.h + date.h \ + hash.h EXTRA_DIST = gedcom.y \ gedcom_date.y \ gedcom_1byte.lex \ diff --git a/gedcom/encoding.c b/gedcom/encoding.c index 152da30..7e00f76 100644 --- a/gedcom/encoding.c +++ b/gedcom/encoding.c @@ -1,5 +1,5 @@ /* Conversion between encodings. - Copyright (C) 2001 The Genes Development Team + Copyright (C) 2001,2002 The Genes Development Team This file is part of the Gedcom parser library. Contributed by Peter Verthez , 2001. @@ -23,74 +23,84 @@ #include #include -#include #include #include #include #include "gedcom_internal.h" #include "encoding.h" +#include "hash.h" #define ENCODING_CONF_FILE "gedcom.enc" #define GCONV_SEARCH_PATH "GCONV_PATH" #define MAXBUF 255 +#define INIT_NR_ENCODINGS 10 static iconv_t cd_to_internal = (iconv_t) -1; -static void *encoding_mapping = NULL; static ENCODING the_enc = ONE_BYTE; - -struct node { - char *gedcom_name; - char *iconv_name; -}; +static hash_t *encodings = NULL; char* charwidth_string[] = { "1", "2_HILO", "2_LOHI" }; -int node_compare(const void *node1, const void *node2) +hnode_t *node_alloc(void *c __attribute__((unused))) { - return strcmp(((const struct node *) node1)->gedcom_name, - ((const struct node *) node2)->gedcom_name); + return malloc(sizeof *node_alloc(NULL)); +} + +void node_free(hnode_t *n, void *c __attribute__((unused))) +{ + free((void*)hnode_getkey(n)); + free(hnode_get(n)); + free(n); } void add_encoding(char *gedcom_n, char* charwidth, char *iconv_n) { - void **datum; - struct node *nodeptr = (struct node *) malloc(sizeof *nodeptr); - nodeptr->gedcom_name = (char *) malloc(strlen(gedcom_n) - + strlen(charwidth) + 3); - nodeptr->iconv_name = (char *) malloc(strlen(iconv_n) + 1); + char *key, *val; + + key = (char *) malloc(strlen(gedcom_n) + strlen(charwidth) + 3); + val = (char *) malloc(strlen(iconv_n) + 1); + /* sprintf is safe here (malloc'ed before) */ - sprintf(nodeptr->gedcom_name, "%s(%s)", gedcom_n, charwidth); - strcpy(nodeptr->iconv_name, iconv_n); - datum = tsearch(nodeptr, &encoding_mapping, node_compare); - if ((datum == NULL) || (*datum != nodeptr)) { + sprintf(key, "%s(%s)", gedcom_n, charwidth); + strcpy(val, iconv_n); + + if (hash_lookup(encodings, key)) { gedcom_warning(_("Duplicate entry found for encoding '%s', ignoring"), gedcom_n); } + else { + hash_alloc_insert(encodings, key, val); + } } char* get_encoding(char* gedcom_n, ENCODING enc) { - void **datum; - struct node search_node; - char *buffer; - buffer = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3); + char *key; + hnode_t *node; + + key = (char*)malloc(strlen(gedcom_n) + strlen(charwidth_string[enc]) + 3); /* sprintf is safe here (malloc'ed before) */ - sprintf(buffer, "%s(%s)", gedcom_n, charwidth_string[enc]); - search_node.gedcom_name = buffer; - datum = tfind(&search_node, &encoding_mapping, node_compare); - free(buffer); - if (datum == NULL) { - gedcom_error(_("No encoding defined for '%s'"), gedcom_n); - return NULL; + sprintf(key, "%s(%s)", gedcom_n, charwidth_string[enc]); + + node = hash_lookup(encodings, key); + free(key); + if (node) { + return hnode_get(node); } else { - return ((const struct node *) *datum)->iconv_name; + gedcom_error(_("No encoding defined for '%s'"), gedcom_n); + return NULL; } } +void cleanup_encodings() +{ + hash_free(encodings); +} + void init_encodings() { - if (encoding_mapping == NULL) { + if (encodings == NULL) { FILE *in; char buffer[MAXBUF + 1]; char gedcom_n[MAXBUF + 1]; @@ -98,6 +108,8 @@ void init_encodings() char iconv_n[MAXBUF + 1]; char *gconv_path; + atexit(cleanup_encodings); + /* Add gedcom data directory to gconv search path */ gconv_path = getenv(GCONV_SEARCH_PATH); if (gconv_path == NULL || strstr(gconv_path, PKGDATADIR) == NULL) { @@ -120,6 +132,9 @@ void init_encodings() gedcom_warning(_("Failed updating conversion module path")); } } + + encodings = hash_create(INIT_NR_ENCODINGS, NULL, NULL); + hash_set_allocator(encodings, node_alloc, node_free, NULL); /* Open gedcom configuration file and read */ in = fopen(ENCODING_CONF_FILE, "r"); diff --git a/gedcom/encoding.h b/gedcom/encoding.h index 732f532..6623e61 100644 --- a/gedcom/encoding.h +++ b/gedcom/encoding.h @@ -31,10 +31,6 @@ typedef enum _ENC { TWO_BYTE_LOHI = 2 } ENCODING; -/* All Unicode characters between U+0000 and U+FFFF can be encoded in - UTF-8 with 3 or less bytes */ -#define UTF_FACTOR 3 - int open_conv_to_internal(char* fromcode); void close_conv_to_internal(); char* to_internal(char* str, size_t len, diff --git a/gedcom/hash.c b/gedcom/hash.c new file mode 100644 index 0000000..69cd136 --- /dev/null +++ b/gedcom/hash.c @@ -0,0 +1,1042 @@ +/* This file is taken from kazlib 1.20 (see URL + http://users.footprints.net/~kaz/kazlib.html) + Only this initial comment has been added (plus two "unused" variable + attributes and a protection for the original CVS keywords). + The next comment gives the original copyright notice. +*/ + +/* + * Hash Table Data Type + * Copyright (C) 1997 Kaz Kylheku + * + * Free Software License: + * + * All rights are reserved by the author, with the following exceptions: + * Permission is granted to freely reproduce and distribute this software, + * possibly in exchange for a fee, provided that this copyright notice appears + * intact. Permission is also granted to adapt this software to produce + * derivative works, as long as the modified versions carry this copyright + * notice and additional notices stating that the work has been modified. + * This source code may be translated into executable form and incorporated + * into proprietary software; there is no requirement for such software to + * contain a copyright notice related to this source. + * + * $kkId: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $ + * $kkName: kazlib_1_20 $ + */ + +#include +#include +#include +#include +#define HASH_IMPLEMENTATION +#include "hash.h" + +#ifdef KAZLIB_RCSID +static const char rcsid[] = "$kkId: hash.c,v 1.36.2.11 2000/11/13 01:36:45 kaz Exp $"; +#endif + +#define INIT_BITS 6 +#define INIT_SIZE (1UL << (INIT_BITS)) /* must be power of two */ +#define INIT_MASK ((INIT_SIZE) - 1) + +#define next hash_next +#define key hash_key +#define data hash_data +#define hkey hash_hkey + +#define table hash_table +#define nchains hash_nchains +#define nodecount hash_nodecount +#define maxcount hash_maxcount +#define highmark hash_highmark +#define lowmark hash_lowmark +#define compare hash_compare +#define function hash_function +#define allocnode hash_allocnode +#define freenode hash_freenode +#define context hash_context +#define mask hash_mask +#define dynamic hash_dynamic + +#define table hash_table +#define chain hash_chain + +static hnode_t *hnode_alloc(void *context); +static void hnode_free(hnode_t *node, void *context); +static hash_val_t hash_fun_default(const void *key); +static int hash_comp_default(const void *key1, const void *key2); + +int hash_val_t_bit; + +/* + * Compute the number of bits in the hash_val_t type. We know that hash_val_t + * is an unsigned integral type. Thus the highest value it can hold is a + * Mersenne number (power of two, less one). We initialize a hash_val_t + * object with this value and then shift bits out one by one while counting. + * Notes: + * 1. HASH_VAL_T_MAX is a Mersenne number---one that is one less than a power + * of two. This means that its binary representation consists of all one + * bits, and hence ``val'' is initialized to all one bits. + * 2. While bits remain in val, we increment the bit count and shift it to the + * right, replacing the topmost bit by zero. + */ + +static void compute_bits(void) +{ + hash_val_t val = HASH_VAL_T_MAX; /* 1 */ + int bits = 0; + + while (val) { /* 2 */ + bits++; + val >>= 1; + } + + hash_val_t_bit = bits; +} + +/* + * Verify whether the given argument is a power of two. + */ + +static int is_power_of_two(hash_val_t arg) +{ + if (arg == 0) + return 0; + while ((arg & 1) == 0) + arg >>= 1; + return (arg == 1); +} + +/* + * Compute a shift amount from a given table size + */ + +static hash_val_t compute_mask(hashcount_t size) +{ + assert (is_power_of_two(size)); + assert (size >= 2); + + return size - 1; +} + +/* + * Initialize the table of pointers to null. + */ + +static void clear_table(hash_t *hash) +{ + hash_val_t i; + + for (i = 0; i < hash->nchains; i++) + hash->table[i] = NULL; +} + +/* + * Double the size of a dynamic table. This works as follows. Each chain splits + * into two adjacent chains. The shift amount increases by one, exposing an + * additional bit of each hashed key. For each node in the original chain, the + * value of this newly exposed bit will decide which of the two new chains will + * receive the node: if the bit is 1, the chain with the higher index will have + * the node, otherwise the lower chain will receive the node. In this manner, + * the hash table will continue to function exactly as before without having to + * rehash any of the keys. + * Notes: + * 1. Overflow check. + * 2. The new number of chains is twice the old number of chains. + * 3. The new mask is one bit wider than the previous, revealing a + * new bit in all hashed keys. + * 4. Allocate a new table of chain pointers that is twice as large as the + * previous one. + * 5. If the reallocation was successful, we perform the rest of the growth + * algorithm, otherwise we do nothing. + * 6. The exposed_bit variable holds a mask with which each hashed key can be + * AND-ed to test the value of its newly exposed bit. + * 7. Now loop over each chain in the table and sort its nodes into two + * chains based on the value of each node's newly exposed hash bit. + * 8. The low chain replaces the current chain. The high chain goes + * into the corresponding sister chain in the upper half of the table. + * 9. We have finished dealing with the chains and nodes. We now update + * the various bookeeping fields of the hash structure. + */ + +static void grow_table(hash_t *hash) +{ + hnode_t **newtable; + + assert (2 * hash->nchains > hash->nchains); /* 1 */ + + newtable = realloc(hash->table, + sizeof *newtable * hash->nchains * 2); /* 4 */ + + if (newtable) { /* 5 */ + hash_val_t mask = (hash->mask << 1) | 1; /* 3 */ + hash_val_t exposed_bit = mask ^ hash->mask; /* 6 */ + hash_val_t chain; + + assert (mask != hash->mask); + + for (chain = 0; chain < hash->nchains; chain++) { /* 7 */ + hnode_t *low_chain = 0, *high_chain = 0, *hptr, *next; + + for (hptr = newtable[chain]; hptr != 0; hptr = next) { + next = hptr->next; + + if (hptr->hkey & exposed_bit) { + hptr->next = high_chain; + high_chain = hptr; + } else { + hptr->next = low_chain; + low_chain = hptr; + } + } + + newtable[chain] = low_chain; /* 8 */ + newtable[chain + hash->nchains] = high_chain; + } + + hash->table = newtable; /* 9 */ + hash->mask = mask; + hash->nchains *= 2; + hash->lowmark *= 2; + hash->highmark *= 2; + } + assert (hash_verify(hash)); +} + +/* + * Cut a table size in half. This is done by folding together adjacent chains + * and populating the lower half of the table with these chains. The chains are + * simply spliced together. Once this is done, the whole table is reallocated + * to a smaller object. + * Notes: + * 1. It is illegal to have a hash table with one slot. This would mean that + * hash->shift is equal to hash_val_t_bit, an illegal shift value. + * Also, other things could go wrong, such as hash->lowmark becoming zero. + * 2. Looping over each pair of sister chains, the low_chain is set to + * point to the head node of the chain in the lower half of the table, + * and high_chain points to the head node of the sister in the upper half. + * 3. The intent here is to compute a pointer to the last node of the + * lower chain into the low_tail variable. If this chain is empty, + * low_tail ends up with a null value. + * 4. If the lower chain is not empty, we simply tack the upper chain onto it. + * If the upper chain is a null pointer, nothing happens. + * 5. Otherwise if the lower chain is empty but the upper one is not, + * If the low chain is empty, but the high chain is not, then the + * high chain is simply transferred to the lower half of the table. + * 6. Otherwise if both chains are empty, there is nothing to do. + * 7. All the chain pointers are in the lower half of the table now, so + * we reallocate it to a smaller object. This, of course, invalidates + * all pointer-to-pointers which reference into the table from the + * first node of each chain. + * 8. Though it's unlikely, the reallocation may fail. In this case we + * pretend that the table _was_ reallocated to a smaller object. + * 9. Finally, update the various table parameters to reflect the new size. + */ + +static void shrink_table(hash_t *hash) +{ + hash_val_t chain, nchains; + hnode_t **newtable, *low_tail, *low_chain, *high_chain; + + assert (hash->nchains >= 2); /* 1 */ + nchains = hash->nchains / 2; + + for (chain = 0; chain < nchains; chain++) { + low_chain = hash->table[chain]; /* 2 */ + high_chain = hash->table[chain + nchains]; + for (low_tail = low_chain; low_tail && low_tail->next; low_tail = low_tail->next) + ; /* 3 */ + if (low_chain != 0) /* 4 */ + low_tail->next = high_chain; + else if (high_chain != 0) /* 5 */ + hash->table[chain] = high_chain; + else + assert (hash->table[chain] == NULL); /* 6 */ + } + newtable = realloc(hash->table, + sizeof *newtable * nchains); /* 7 */ + if (newtable) /* 8 */ + hash->table = newtable; + hash->mask >>= 1; /* 9 */ + hash->nchains = nchains; + hash->lowmark /= 2; + hash->highmark /= 2; + assert (hash_verify(hash)); +} + + +/* + * Create a dynamic hash table. Both the hash table structure and the table + * itself are dynamically allocated. Furthermore, the table is extendible in + * that it will automatically grow as its load factor increases beyond a + * certain threshold. + * Notes: + * 1. If the number of bits in the hash_val_t type has not been computed yet, + * we do so here, because this is likely to be the first function that the + * user calls. + * 2. Allocate a hash table control structure. + * 3. If a hash table control structure is successfully allocated, we + * proceed to initialize it. Otherwise we return a null pointer. + * 4. We try to allocate the table of hash chains. + * 5. If we were able to allocate the hash chain table, we can finish + * initializing the hash structure and the table. Otherwise, we must + * backtrack by freeing the hash structure. + * 6. INIT_SIZE should be a power of two. The high and low marks are always set + * to be twice the table size and half the table size respectively. When the + * number of nodes in the table grows beyond the high size (beyond load + * factor 2), it will double in size to cut the load factor down to about + * about 1. If the table shrinks down to or beneath load factor 0.5, + * it will shrink, bringing the load up to about 1. However, the table + * will never shrink beneath INIT_SIZE even if it's emptied. + * 7. This indicates that the table is dynamically allocated and dynamically + * resized on the fly. A table that has this value set to zero is + * assumed to be statically allocated and will not be resized. + * 8. The table of chains must be properly reset to all null pointers. + */ + +hash_t *hash_create(hashcount_t maxcount, hash_comp_t compfun, + hash_fun_t hashfun) +{ + hash_t *hash; + + if (hash_val_t_bit == 0) /* 1 */ + compute_bits(); + + hash = malloc(sizeof *hash); /* 2 */ + + if (hash) { /* 3 */ + hash->table = malloc(sizeof *hash->table * INIT_SIZE); /* 4 */ + if (hash->table) { /* 5 */ + hash->nchains = INIT_SIZE; /* 6 */ + hash->highmark = INIT_SIZE * 2; + hash->lowmark = INIT_SIZE / 2; + hash->nodecount = 0; + hash->maxcount = maxcount; + hash->compare = compfun ? compfun : hash_comp_default; + hash->function = hashfun ? hashfun : hash_fun_default; + hash->allocnode = hnode_alloc; + hash->freenode = hnode_free; + hash->context = NULL; + hash->mask = INIT_MASK; + hash->dynamic = 1; /* 7 */ + clear_table(hash); /* 8 */ + assert (hash_verify(hash)); + return hash; + } + free(hash); + } + + return NULL; +} + +/* + * Select a different set of node allocator routines. + */ + +void hash_set_allocator(hash_t *hash, hnode_alloc_t al, + hnode_free_t fr, void *context) +{ + assert (hash_count(hash) == 0); + assert ((al == 0 && fr == 0) || (al != 0 && fr != 0)); + + hash->allocnode = al ? al : hnode_alloc; + hash->freenode = fr ? fr : hnode_free; + hash->context = context; +} + +/* + * Free every node in the hash using the hash->freenode() function pointer, and + * cause the hash to become empty. + */ + +void hash_free_nodes(hash_t *hash) +{ + hscan_t hs; + hnode_t *node; + hash_scan_begin(&hs, hash); + while ((node = hash_scan_next(&hs))) { + hash_scan_delete(hash, node); + hash->freenode(node, hash->context); + } + hash->nodecount = 0; + clear_table(hash); +} + +/* + * Obsolescent function for removing all nodes from a table, + * freeing them and then freeing the table all in one step. + */ + +void hash_free(hash_t *hash) +{ +#ifdef KAZLIB_OBSOLESCENT_DEBUG + assert ("call to obsolescent function hash_free()" && 0); +#endif + hash_free_nodes(hash); + hash_destroy(hash); +} + +/* + * Free a dynamic hash table structure. + */ + +void hash_destroy(hash_t *hash) +{ + assert (hash_val_t_bit != 0); + assert (hash_isempty(hash)); + free(hash->table); + free(hash); +} + +/* + * Initialize a user supplied hash structure. The user also supplies a table of + * chains which is assigned to the hash structure. The table is static---it + * will not grow or shrink. + * 1. See note 1. in hash_create(). + * 2. The user supplied array of pointers hopefully contains nchains nodes. + * 3. See note 7. in hash_create(). + * 4. We must dynamically compute the mask from the given power of two table + * size. + * 5. The user supplied table can't be assumed to contain null pointers, + * so we reset it here. + */ + +hash_t *hash_init(hash_t *hash, hashcount_t maxcount, + hash_comp_t compfun, hash_fun_t hashfun, hnode_t **table, + hashcount_t nchains) +{ + if (hash_val_t_bit == 0) /* 1 */ + compute_bits(); + + assert (is_power_of_two(nchains)); + + hash->table = table; /* 2 */ + hash->nchains = nchains; + hash->nodecount = 0; + hash->maxcount = maxcount; + hash->compare = compfun ? compfun : hash_comp_default; + hash->function = hashfun ? hashfun : hash_fun_default; + hash->dynamic = 0; /* 3 */ + hash->mask = compute_mask(nchains); /* 4 */ + clear_table(hash); /* 5 */ + + assert (hash_verify(hash)); + + return hash; +} + +/* + * Reset the hash scanner so that the next element retrieved by + * hash_scan_next() shall be the first element on the first non-empty chain. + * Notes: + * 1. Locate the first non empty chain. + * 2. If an empty chain is found, remember which one it is and set the next + * pointer to refer to its first element. + * 3. Otherwise if a chain is not found, set the next pointer to NULL + * so that hash_scan_next() shall indicate failure. + */ + +void hash_scan_begin(hscan_t *scan, hash_t *hash) +{ + hash_val_t nchains = hash->nchains; + hash_val_t chain; + + scan->table = hash; + + /* 1 */ + + for (chain = 0; chain < nchains && hash->table[chain] == 0; chain++) + ; + + if (chain < nchains) { /* 2 */ + scan->chain = chain; + scan->next = hash->table[chain]; + } else { /* 3 */ + scan->next = NULL; + } +} + +/* + * Retrieve the next node from the hash table, and update the pointer + * for the next invocation of hash_scan_next(). + * Notes: + * 1. Remember the next pointer in a temporary value so that it can be + * returned. + * 2. This assertion essentially checks whether the module has been properly + * initialized. The first point of interaction with the module should be + * either hash_create() or hash_init(), both of which set hash_val_t_bit to + * a non zero value. + * 3. If the next pointer we are returning is not NULL, then the user is + * allowed to call hash_scan_next() again. We prepare the new next pointer + * for that call right now. That way the user is allowed to delete the node + * we are about to return, since we will no longer be needing it to locate + * the next node. + * 4. If there is a next node in the chain (next->next), then that becomes the + * new next node, otherwise ... + * 5. We have exhausted the current chain, and must locate the next subsequent + * non-empty chain in the table. + * 6. If a non-empty chain is found, the first element of that chain becomes + * the new next node. Otherwise there is no new next node and we set the + * pointer to NULL so that the next time hash_scan_next() is called, a null + * pointer shall be immediately returned. + */ + + +hnode_t *hash_scan_next(hscan_t *scan) +{ + hnode_t *next = scan->next; /* 1 */ + hash_t *hash = scan->table; + hash_val_t chain = scan->chain + 1; + hash_val_t nchains = hash->nchains; + + assert (hash_val_t_bit != 0); /* 2 */ + + if (next) { /* 3 */ + if (next->next) { /* 4 */ + scan->next = next->next; + } else { + while (chain < nchains && hash->table[chain] == 0) /* 5 */ + chain++; + if (chain < nchains) { /* 6 */ + scan->chain = chain; + scan->next = hash->table[chain]; + } else { + scan->next = NULL; + } + } + } + return next; +} + +/* + * Insert a node into the hash table. + * Notes: + * 1. It's illegal to insert more than the maximum number of nodes. The client + * should verify that the hash table is not full before attempting an + * insertion. + * 2. The same key may not be inserted into a table twice. + * 3. If the table is dynamic and the load factor is already at >= 2, + * grow the table. + * 4. We take the bottom N bits of the hash value to derive the chain index, + * where N is the base 2 logarithm of the size of the hash table. + */ + +void hash_insert(hash_t *hash, hnode_t *node, const void *key) +{ + hash_val_t hkey, chain; + + assert (hash_val_t_bit != 0); + assert (node->next == NULL); + assert (hash->nodecount < hash->maxcount); /* 1 */ + assert (hash_lookup(hash, key) == NULL); /* 2 */ + + if (hash->dynamic && hash->nodecount >= hash->highmark) /* 3 */ + grow_table(hash); + + hkey = hash->function(key); + chain = hkey & hash->mask; /* 4 */ + + node->key = key; + node->hkey = hkey; + node->next = hash->table[chain]; + hash->table[chain] = node; + hash->nodecount++; + + assert (hash_verify(hash)); +} + +/* + * Find a node in the hash table and return a pointer to it. + * Notes: + * 1. We hash the key and keep the entire hash value. As an optimization, when + * we descend down the chain, we can compare hash values first and only if + * hash values match do we perform a full key comparison. + * 2. To locate the chain from among 2^N chains, we look at the lower N bits of + * the hash value by anding them with the current mask. + * 3. Looping through the chain, we compare the stored hash value inside each + * node against our computed hash. If they match, then we do a full + * comparison between the unhashed keys. If these match, we have located the + * entry. + */ + +hnode_t *hash_lookup(hash_t *hash, const void *key) +{ + hash_val_t hkey, chain; + hnode_t *nptr; + + hkey = hash->function(key); /* 1 */ + chain = hkey & hash->mask; /* 2 */ + + for (nptr = hash->table[chain]; nptr; nptr = nptr->next) { /* 3 */ + if (nptr->hkey == hkey && hash->compare(nptr->key, key) == 0) + return nptr; + } + + return NULL; +} + +/* + * Delete the given node from the hash table. Since the chains + * are singly linked, we must locate the start of the node's chain + * and traverse. + * Notes: + * 1. The node must belong to this hash table, and its key must not have + * been tampered with. + * 2. If this deletion will take the node count below the low mark, we + * shrink the table now. + * 3. Determine which chain the node belongs to, and fetch the pointer + * to the first node in this chain. + * 4. If the node being deleted is the first node in the chain, then + * simply update the chain head pointer. + * 5. Otherwise advance to the node's predecessor, and splice out + * by updating the predecessor's next pointer. + * 6. Indicate that the node is no longer in a hash table. + */ + +hnode_t *hash_delete(hash_t *hash, hnode_t *node) +{ + hash_val_t chain; + hnode_t *hptr; + + assert (hash_lookup(hash, node->key) == node); /* 1 */ + assert (hash_val_t_bit != 0); + + if (hash->dynamic && hash->nodecount <= hash->lowmark + && hash->nodecount > INIT_SIZE) + shrink_table(hash); /* 2 */ + + chain = node->hkey & hash->mask; /* 3 */ + hptr = hash->table[chain]; + + if (hptr == node) { /* 4 */ + hash->table[chain] = node->next; + } else { + while (hptr->next != node) { /* 5 */ + assert (hptr != 0); + hptr = hptr->next; + } + assert (hptr->next == node); + hptr->next = node->next; + } + + hash->nodecount--; + assert (hash_verify(hash)); + + node->next = NULL; /* 6 */ + return node; +} + +int hash_alloc_insert(hash_t *hash, const void *key, void *data) +{ + hnode_t *node = hash->allocnode(hash->context); + + if (node) { + hnode_init(node, data); + hash_insert(hash, node, key); + return 1; + } + return 0; +} + +void hash_delete_free(hash_t *hash, hnode_t *node) +{ + hash_delete(hash, node); + hash->freenode(node, hash->context); +} + +/* + * Exactly like hash_delete, except does not trigger table shrinkage. This is to be + * used from within a hash table scan operation. See notes for hash_delete. + */ + +hnode_t *hash_scan_delete(hash_t *hash, hnode_t *node) +{ + hash_val_t chain; + hnode_t *hptr; + + assert (hash_lookup(hash, node->key) == node); + assert (hash_val_t_bit != 0); + + chain = node->hkey & hash->mask; + hptr = hash->table[chain]; + + if (hptr == node) { + hash->table[chain] = node->next; + } else { + while (hptr->next != node) + hptr = hptr->next; + hptr->next = node->next; + } + + hash->nodecount--; + assert (hash_verify(hash)); + node->next = NULL; + + return node; +} + +/* + * Like hash_delete_free but based on hash_scan_delete. + */ + +void hash_scan_delfree(hash_t *hash, hnode_t *node) +{ + hash_scan_delete(hash, node); + hash->freenode(node, hash->context); +} + +/* + * Verify whether the given object is a valid hash table. This means + * Notes: + * 1. If the hash table is dynamic, verify whether the high and + * low expansion/shrinkage thresholds are powers of two. + * 2. Count all nodes in the table, and test each hash value + * to see whether it is correct for the node's chain. + */ + +int hash_verify(hash_t *hash) +{ + hashcount_t count = 0; + hash_val_t chain; + hnode_t *hptr; + + if (hash->dynamic) { /* 1 */ + if (hash->lowmark >= hash->highmark) + return 0; + if (!is_power_of_two(hash->highmark)) + return 0; + if (!is_power_of_two(hash->lowmark)) + return 0; + } + + for (chain = 0; chain < hash->nchains; chain++) { /* 2 */ + for (hptr = hash->table[chain]; hptr != 0; hptr = hptr->next) { + if ((hptr->hkey & hash->mask) != chain) + return 0; + count++; + } + } + + if (count != hash->nodecount) + return 0; + + return 1; +} + +/* + * Test whether the hash table is full and return 1 if this is true, + * 0 if it is false. + */ + +#undef hash_isfull +int hash_isfull(hash_t *hash) +{ + return hash->nodecount == hash->maxcount; +} + +/* + * Test whether the hash table is empty and return 1 if this is true, + * 0 if it is false. + */ + +#undef hash_isempty +int hash_isempty(hash_t *hash) +{ + return hash->nodecount == 0; +} + +static hnode_t *hnode_alloc(void *context __attribute__((unused))) +{ + return malloc(sizeof *hnode_alloc(NULL)); +} + +static void hnode_free(hnode_t *node, void *context __attribute__((unused))) +{ + free(node); +} + + +/* + * Create a hash table node dynamically and assign it the given data. + */ + +hnode_t *hnode_create(void *data) +{ + hnode_t *node = malloc(sizeof *node); + if (node) { + node->data = data; + node->next = NULL; + } + return node; +} + +/* + * Initialize a client-supplied node + */ + +hnode_t *hnode_init(hnode_t *hnode, void *data) +{ + hnode->data = data; + hnode->next = NULL; + return hnode; +} + +/* + * Destroy a dynamically allocated node. + */ + +void hnode_destroy(hnode_t *hnode) +{ + free(hnode); +} + +#undef hnode_put +void hnode_put(hnode_t *node, void *data) +{ + node->data = data; +} + +#undef hnode_get +void *hnode_get(hnode_t *node) +{ + return node->data; +} + +#undef hnode_getkey +const void *hnode_getkey(hnode_t *node) +{ + return node->key; +} + +#undef hash_count +hashcount_t hash_count(hash_t *hash) +{ + return hash->nodecount; +} + +#undef hash_size +hashcount_t hash_size(hash_t *hash) +{ + return hash->nchains; +} + +static hash_val_t hash_fun_default(const void *key) +{ + static unsigned long randbox[] = { + 0x49848f1bU, 0xe6255dbaU, 0x36da5bdcU, 0x47bf94e9U, + 0x8cbcce22U, 0x559fc06aU, 0xd268f536U, 0xe10af79aU, + 0xc1af4d69U, 0x1d2917b5U, 0xec4c304dU, 0x9ee5016cU, + 0x69232f74U, 0xfead7bb3U, 0xe9089ab6U, 0xf012f6aeU, + }; + + const unsigned char *str = key; + hash_val_t acc = 0; + + while (*str) { + acc ^= randbox[(*str + acc) & 0xf]; + acc = (acc << 1) | (acc >> 31); + acc &= 0xffffffffU; + acc ^= randbox[((*str++ >> 4) + acc) & 0xf]; + acc = (acc << 2) | (acc >> 30); + acc &= 0xffffffffU; + } + return acc; +} + +static int hash_comp_default(const void *key1, const void *key2) +{ + return strcmp(key1, key2); +} + +#ifdef KAZLIB_TEST_MAIN + +#include +#include +#include + +typedef char input_t[256]; + +static int tokenize(char *string, ...) +{ + char **tokptr; + va_list arglist; + int tokcount = 0; + + va_start(arglist, string); + tokptr = va_arg(arglist, char **); + while (tokptr) { + while (*string && isspace((unsigned char) *string)) + string++; + if (!*string) + break; + *tokptr = string; + while (*string && !isspace((unsigned char) *string)) + string++; + tokptr = va_arg(arglist, char **); + tokcount++; + if (!*string) + break; + *string++ = 0; + } + va_end(arglist); + + return tokcount; +} + +static char *dupstring(char *str) +{ + int sz = strlen(str) + 1; + char *new = malloc(sz); + if (new) + memcpy(new, str, sz); + return new; +} + +static hnode_t *new_node(void *c) +{ + static hnode_t few[5]; + static int count; + + if (count < 5) + return few + count++; + + return NULL; +} + +static void del_node(hnode_t *n, void *c) +{ +} + +int main(void) +{ + input_t in; + hash_t *h = hash_create(HASHCOUNT_T_MAX, 0, 0); + hnode_t *hn; + hscan_t hs; + char *tok1, *tok2, *val; + const char *key; + int prompt = 0; + + char *help = + "a add value to hash table\n" + "d delete value from hash table\n" + "l lookup value in hash table\n" + "n show size of hash table\n" + "c show number of entries\n" + "t dump whole hash table\n" + "+ increase hash table (private func)\n" + "- decrease hash table (private func)\n" + "b print hash_t_bit value\n" + "p turn prompt on\n" + "s switch to non-functioning allocator\n" + "q quit"; + + if (!h) + puts("hash_create failed"); + + for (;;) { + if (prompt) + putchar('>'); + fflush(stdout); + + if (!fgets(in, sizeof(input_t), stdin)) + break; + + switch(in[0]) { + case '?': + puts(help); + break; + case 'b': + printf("%d\n", hash_val_t_bit); + break; + case 'a': + if (tokenize(in+1, &tok1, &tok2, (char **) 0) != 2) { + puts("what?"); + break; + } + key = dupstring(tok1); + val = dupstring(tok2); + + if (!key || !val) { + puts("out of memory"); + free((void *) key); + free(val); + } + + if (!hash_alloc_insert(h, key, val)) { + puts("hash_alloc_insert failed"); + free((void *) key); + free(val); + break; + } + break; + case 'd': + if (tokenize(in+1, &tok1, (char **) 0) != 1) { + puts("what?"); + break; + } + hn = hash_lookup(h, tok1); + if (!hn) { + puts("hash_lookup failed"); + break; + } + val = hnode_get(hn); + key = hnode_getkey(hn); + hash_scan_delfree(h, hn); + free((void *) key); + free(val); + break; + case 'l': + if (tokenize(in+1, &tok1, (char **) 0) != 1) { + puts("what?"); + break; + } + hn = hash_lookup(h, tok1); + if (!hn) { + puts("hash_lookup failed"); + break; + } + val = hnode_get(hn); + puts(val); + break; + case 'n': + printf("%lu\n", (unsigned long) hash_size(h)); + break; + case 'c': + printf("%lu\n", (unsigned long) hash_count(h)); + break; + case 't': + hash_scan_begin(&hs, h); + while ((hn = hash_scan_next(&hs))) + printf("%s\t%s\n", (char*) hnode_getkey(hn), + (char*) hnode_get(hn)); + break; + case '+': + grow_table(h); /* private function */ + break; + case '-': + shrink_table(h); /* private function */ + break; + case 'q': + exit(0); + break; + case '\0': + break; + case 'p': + prompt = 1; + break; + case 's': + hash_set_allocator(h, new_node, del_node, NULL); + break; + default: + putchar('?'); + putchar('\n'); + break; + } + } + + return 0; +} + +#endif diff --git a/gedcom/hash.h b/gedcom/hash.h new file mode 100644 index 0000000..a425609 --- /dev/null +++ b/gedcom/hash.h @@ -0,0 +1,247 @@ +/* This file is taken from kazlib 1.20 (see URL + http://users.footprints.net/~kaz/kazlib.html) + Only this initial comment has been added and a protection for the original + CVS keywords. + The next comment gives the original copyright notice. +*/ + +/* + * Hash Table Data Type + * Copyright (C) 1997 Kaz Kylheku + * + * Free Software License: + * + * All rights are reserved by the author, with the following exceptions: + * Permission is granted to freely reproduce and distribute this software, + * possibly in exchange for a fee, provided that this copyright notice appears + * intact. Permission is also granted to adapt this software to produce + * derivative works, as long as the modified versions carry this copyright + * notice and additional notices stating that the work has been modified. + * This source code may be translated into executable form and incorporated + * into proprietary software; there is no requirement for such software to + * contain a copyright notice related to this source. + * + * $kkId: hash.h,v 1.22.2.7 2000/11/13 01:36:45 kaz Exp $ + * $kkName: kazlib_1_20 $ + */ + +#ifndef HASH_H +#define HASH_H + +#include +#ifdef KAZLIB_SIDEEFFECT_DEBUG +#include "sfx.h" +#endif + +/* + * Blurb for inclusion into C++ translation units + */ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned long hashcount_t; +#define HASHCOUNT_T_MAX ULONG_MAX + +typedef unsigned long hash_val_t; +#define HASH_VAL_T_MAX ULONG_MAX + +extern int hash_val_t_bit; + +#ifndef HASH_VAL_T_BIT +#define HASH_VAL_T_BIT ((int) hash_val_t_bit) +#endif + +/* + * Hash chain node structure. + * Notes: + * 1. This preprocessing directive is for debugging purposes. The effect is + * that if the preprocessor symbol KAZLIB_OPAQUE_DEBUG is defined prior to the + * inclusion of this header, then the structure shall be declared as having + * the single member int __OPAQUE__. This way, any attempts by the + * client code to violate the principles of information hiding (by accessing + * the structure directly) can be diagnosed at translation time. However, + * note the resulting compiled unit is not suitable for linking. + * 2. This is a pointer to the next node in the chain. In the last node of a + * chain, this pointer is null. + * 3. The key is a pointer to some user supplied data that contains a unique + * identifier for each hash node in a given table. The interpretation of + * the data is up to the user. When creating or initializing a hash table, + * the user must supply a pointer to a function for comparing two keys, + * and a pointer to a function for hashing a key into a numeric value. + * 4. The value is a user-supplied pointer to void which may refer to + * any data object. It is not interpreted in any way by the hashing + * module. + * 5. The hashed key is stored in each node so that we don't have to rehash + * each key when the table must grow or shrink. + */ + +typedef struct hnode_t { + #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) /* 1 */ + struct hnode_t *hash_next; /* 2 */ + const void *hash_key; /* 3 */ + void *hash_data; /* 4 */ + hash_val_t hash_hkey; /* 5 */ + #else + int hash_dummy; + #endif +} hnode_t; + +/* + * The comparison function pointer type. A comparison function takes two keys + * and produces a value of -1 if the left key is less than the right key, a + * value of 0 if the keys are equal, and a value of 1 if the left key is + * greater than the right key. + */ + +typedef int (*hash_comp_t)(const void *, const void *); + +/* + * The hashing function performs some computation on a key and produces an + * integral value of type hash_val_t based on that key. For best results, the + * function should have a good randomness properties in *all* significant bits + * over the set of keys that are being inserted into a given hash table. In + * particular, the most significant bits of hash_val_t are most significant to + * the hash module. Only as the hash table expands are less significant bits + * examined. Thus a function that has good distribution in its upper bits but + * not lower is preferrable to one that has poor distribution in the upper bits + * but not the lower ones. + */ + +typedef hash_val_t (*hash_fun_t)(const void *); + +/* + * allocator functions + */ + +typedef hnode_t *(*hnode_alloc_t)(void *); +typedef void (*hnode_free_t)(hnode_t *, void *); + +/* + * This is the hash table control structure. It keeps track of information + * about a hash table, as well as the hash table itself. + * Notes: + * 1. Pointer to the hash table proper. The table is an array of pointers to + * hash nodes (of type hnode_t). If the table is empty, every element of + * this table is a null pointer. A non-null entry points to the first + * element of a chain of nodes. + * 2. This member keeps track of the size of the hash table---that is, the + * number of chain pointers. + * 3. The count member maintains the number of elements that are presently + * in the hash table. + * 4. The maximum count is the greatest number of nodes that can populate this + * table. If the table contains this many nodes, no more can be inserted, + * and the hash_isfull() function returns true. + * 5. The high mark is a population threshold, measured as a number of nodes, + * which, if exceeded, will trigger a table expansion. Only dynamic hash + * tables are subject to this expansion. + * 6. The low mark is a minimum population threshold, measured as a number of + * nodes. If the table population drops below this value, a table shrinkage + * will occur. Only dynamic tables are subject to this reduction. No table + * will shrink beneath a certain absolute minimum number of nodes. + * 7. This is the a pointer to the hash table's comparison function. The + * function is set once at initialization or creation time. + * 8. Pointer to the table's hashing function, set once at creation or + * initialization time. + * 9. The current hash table mask. If the size of the hash table is 2^N, + * this value has its low N bits set to 1, and the others clear. It is used + * to select bits from the result of the hashing function to compute an + * index into the table. + * 10. A flag which indicates whether the table is to be dynamically resized. It + * is set to 1 in dynamically allocated tables, 0 in tables that are + * statically allocated. + */ + +typedef struct hash_t { + #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) + struct hnode_t **hash_table; /* 1 */ + hashcount_t hash_nchains; /* 2 */ + hashcount_t hash_nodecount; /* 3 */ + hashcount_t hash_maxcount; /* 4 */ + hashcount_t hash_highmark; /* 5 */ + hashcount_t hash_lowmark; /* 6 */ + hash_comp_t hash_compare; /* 7 */ + hash_fun_t hash_function; /* 8 */ + hnode_alloc_t hash_allocnode; + hnode_free_t hash_freenode; + void *hash_context; + hash_val_t hash_mask; /* 9 */ + int hash_dynamic; /* 10 */ + #else + int hash_dummy; + #endif +} hash_t; + +/* + * Hash scanner structure, used for traversals of the data structure. + * Notes: + * 1. Pointer to the hash table that is being traversed. + * 2. Reference to the current chain in the table being traversed (the chain + * that contains the next node that shall be retrieved). + * 3. Pointer to the node that will be retrieved by the subsequent call to + * hash_scan_next(). + */ + +typedef struct hscan_t { + #if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) + hash_t *hash_table; /* 1 */ + hash_val_t hash_chain; /* 2 */ + hnode_t *hash_next; /* 3 */ + #else + int hash_dummy; + #endif +} hscan_t; + +extern hash_t *hash_create(hashcount_t, hash_comp_t, hash_fun_t); +extern void hash_set_allocator(hash_t *, hnode_alloc_t, hnode_free_t, void *); +extern void hash_destroy(hash_t *); +extern void hash_free_nodes(hash_t *); +extern void hash_free(hash_t *); +extern hash_t *hash_init(hash_t *, hashcount_t, hash_comp_t, + hash_fun_t, hnode_t **, hashcount_t); +extern void hash_insert(hash_t *, hnode_t *, const void *); +extern hnode_t *hash_lookup(hash_t *, const void *); +extern hnode_t *hash_delete(hash_t *, hnode_t *); +extern int hash_alloc_insert(hash_t *, const void *, void *); +extern void hash_delete_free(hash_t *, hnode_t *); + +extern void hnode_put(hnode_t *, void *); +extern void *hnode_get(hnode_t *); +extern const void *hnode_getkey(hnode_t *); +extern hashcount_t hash_count(hash_t *); +extern hashcount_t hash_size(hash_t *); + +extern int hash_isfull(hash_t *); +extern int hash_isempty(hash_t *); + +extern void hash_scan_begin(hscan_t *, hash_t *); +extern hnode_t *hash_scan_next(hscan_t *); +extern hnode_t *hash_scan_delete(hash_t *, hnode_t *); +extern void hash_scan_delfree(hash_t *, hnode_t *); + +extern int hash_verify(hash_t *); + +extern hnode_t *hnode_create(void *); +extern hnode_t *hnode_init(hnode_t *, void *); +extern void hnode_destroy(hnode_t *); + +#if defined(HASH_IMPLEMENTATION) || !defined(KAZLIB_OPAQUE_DEBUG) +#ifdef KAZLIB_SIDEEFFECT_DEBUG +#define hash_isfull(H) (SFX_CHECK(H)->hash_nodecount == (H)->hash_maxcount) +#else +#define hash_isfull(H) ((H)->hash_nodecount == (H)->hash_maxcount) +#endif +#define hash_isempty(H) ((H)->hash_nodecount == 0) +#define hash_count(H) ((H)->hash_nodecount) +#define hash_size(H) ((H)->hash_nchains) +#define hnode_get(N) ((N)->hash_data) +#define hnode_getkey(N) ((N)->hash_key) +#define hnode_put(N, V) ((N)->hash_data = (V)) +#endif + +#ifdef __cplusplus +} +#endif + +#endif -- 2.30.2