diff options
Diffstat (limited to 'src/Common/libzip/zip_hash.c')
-rw-r--r-- | src/Common/libzip/zip_hash.c | 449 |
1 files changed, 296 insertions, 153 deletions
diff --git a/src/Common/libzip/zip_hash.c b/src/Common/libzip/zip_hash.c index 23f9708d..d3a954ec 100644 --- a/src/Common/libzip/zip_hash.c +++ b/src/Common/libzip/zip_hash.c @@ -1,9 +1,9 @@ /* zip_hash.c -- hash table string -> uint64 - Copyright (C) 2015-2016 Dieter Baron and Thomas Klausner + Copyright (C) 2015-2021 Dieter Baron and Thomas Klausner This file is part of libzip, a library to manipulate ZIP archives. - The authors can be contacted at <libzip@nih.at> + The authors can be contacted at <info@libzip.org> Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions @@ -17,7 +17,7 @@ 3. The names of the authors may not be used to endorse or promote products derived from this software without specific prior written permission. - + THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE @@ -31,237 +31,380 @@ IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "zipint.h" #include <stdlib.h> #include <string.h> -#include "zipint.h" + +/* parameter for the string hash function */ +#define HASH_MULTIPLIER 33 +#define HASH_START 5381 + +/* hash table's fill ratio is kept between these by doubling/halfing its size as necessary */ +#define HASH_MAX_FILL .75 +#define HASH_MIN_FILL .01 + +/* but hash table size is kept between these */ +#define HASH_MIN_SIZE 256 +#define HASH_MAX_SIZE 0x80000000ul struct zip_hash_entry { const zip_uint8_t *name; zip_int64_t orig_index; zip_int64_t current_index; struct zip_hash_entry *next; + zip_uint32_t hash_value; }; typedef struct zip_hash_entry zip_hash_entry_t; struct zip_hash { - zip_uint16_t table_size; + zip_uint32_t table_size; + zip_uint64_t nentries; zip_hash_entry_t **table; }; -zip_hash_t * -_zip_hash_new(zip_uint16_t table_size, zip_error_t *error) -{ - zip_hash_t *hash; - if (table_size == 0) { - zip_error_set(error, ZIP_ER_INTERNAL, 0); - return NULL; +/* free list of entries */ +static void +free_list(zip_hash_entry_t *entry) { + while (entry != NULL) { + zip_hash_entry_t *next = entry->next; + free(entry); + entry = next; } +} - if ((hash=(zip_hash_t *)malloc(sizeof(zip_hash_t))) == NULL) { - zip_error_set(error, ZIP_ER_MEMORY, 0); - return NULL; + +/* compute hash of string, full 32 bit value */ +static zip_uint32_t +hash_string(const zip_uint8_t *name) { + zip_uint64_t value = HASH_START; + + if (name == NULL) { + return 0; } - hash->table_size = table_size; - if ((hash->table=(zip_hash_entry_t**)calloc(table_size, sizeof(zip_hash_entry_t *))) == NULL) { - free(hash); - zip_error_set(error, ZIP_ER_MEMORY, 0); - return NULL; + + while (*name != 0) { + value = (zip_uint64_t)(((value * HASH_MULTIPLIER) + (zip_uint8_t)*name) % 0x100000000ul); + name++; } - return hash; + return (zip_uint32_t)value; } -static void -_free_list(zip_hash_entry_t *entry) -{ - zip_hash_entry_t *next; - do { - next = entry->next; - free(entry); - entry = next; - } while (entry != NULL); -} -void -_zip_hash_free(zip_hash_t *hash) -{ - zip_uint16_t i; +/* resize hash table; new_size must be a power of 2, can be larger or smaller than current size */ +static bool +hash_resize(zip_hash_t *hash, zip_uint32_t new_size, zip_error_t *error) { + zip_hash_entry_t **new_table; - if (hash == NULL) { - return; + if (new_size == hash->table_size) { + return true; + } + + if ((new_table = (zip_hash_entry_t **)calloc(new_size, sizeof(zip_hash_entry_t *))) == NULL) { + zip_error_set(error, ZIP_ER_MEMORY, 0); + return false; } - for (i=0; i<hash->table_size; i++) { - if (hash->table[i] != NULL) { - _free_list(hash->table[i]); - } + if (hash->nentries > 0) { + zip_uint32_t i; + + for (i = 0; i < hash->table_size; i++) { + zip_hash_entry_t *entry = hash->table[i]; + while (entry) { + zip_hash_entry_t *next = entry->next; + + zip_uint32_t new_index = entry->hash_value % new_size; + + entry->next = new_table[new_index]; + new_table[new_index] = entry; + + entry = next; + } + } } + free(hash->table); - free(hash); + hash->table = new_table; + hash->table_size = new_size; + + return true; } -static zip_uint16_t -_hash_string(const zip_uint8_t *name, zip_uint16_t size) -{ -#define HASH_MULTIPLIER 33 - zip_uint16_t value = 5381; - if (name == NULL) - return 0; +static zip_uint32_t +size_for_capacity(zip_uint64_t capacity) { + double needed_size = capacity / HASH_MAX_FILL; + zip_uint32_t v; - while (*name != 0) { - value = (zip_uint16_t)(((value * HASH_MULTIPLIER) + (zip_uint8_t)*name) % size); - name++; + if (needed_size > ZIP_UINT32_MAX) { + v = ZIP_UINT32_MAX; + } + else { + v = (zip_uint32_t)needed_size; + } + + if (v > HASH_MAX_SIZE) { + return HASH_MAX_SIZE; + } + + /* From Bit Twiddling Hacks by Sean Eron Anderson <seander@cs.stanford.edu> + (http://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2). */ + + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + + return v; +} + + +zip_hash_t * +_zip_hash_new(zip_error_t *error) { + zip_hash_t *hash; + + if ((hash = (zip_hash_t *)malloc(sizeof(zip_hash_t))) == NULL) { + zip_error_set(error, ZIP_ER_MEMORY, 0); + return NULL; } - return value; + hash->table_size = 0; + hash->nentries = 0; + hash->table = NULL; + + return hash; } + +void +_zip_hash_free(zip_hash_t *hash) { + zip_uint32_t i; + + if (hash == NULL) { + return; + } + + if (hash->table != NULL) { + for (i = 0; i < hash->table_size; i++) { + if (hash->table[i] != NULL) { + free_list(hash->table[i]); + } + } + free(hash->table); + } + free(hash); +} + + /* insert into hash, return error on existence or memory issues */ bool -_zip_hash_add(zip_hash_t *hash, const zip_uint8_t *name, zip_uint64_t index, zip_flags_t flags, zip_error_t *error) -{ - zip_uint16_t hash_value; +_zip_hash_add(zip_hash_t *hash, const zip_uint8_t *name, zip_uint64_t index, zip_flags_t flags, zip_error_t *error) { + zip_uint32_t hash_value, table_index; zip_hash_entry_t *entry; if (hash == NULL || name == NULL || index > ZIP_INT64_MAX) { - zip_error_set(error, ZIP_ER_INVAL, 0); - return false; + zip_error_set(error, ZIP_ER_INVAL, 0); + return false; } - hash_value = _hash_string(name, hash->table_size); - for (entry = hash->table[hash_value]; entry != NULL; entry = entry->next) { - if (strcmp((const char *)name, (const char *)entry->name) == 0) { - if (((flags & ZIP_FL_UNCHANGED) && entry->orig_index != -1) || entry->current_index != -1) { - zip_error_set(error, ZIP_ER_EXISTS, 0); - return false; - } - else { - break; - } - } + if (hash->table_size == 0) { + if (!hash_resize(hash, HASH_MIN_SIZE, error)) { + return false; + } + } + + hash_value = hash_string(name); + table_index = hash_value % hash->table_size; + + for (entry = hash->table[table_index]; entry != NULL; entry = entry->next) { + if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) { + if (((flags & ZIP_FL_UNCHANGED) && entry->orig_index != -1) || entry->current_index != -1) { + zip_error_set(error, ZIP_ER_EXISTS, 0); + return false; + } + else { + break; + } + } } if (entry == NULL) { - if ((entry=(zip_hash_entry_t *)malloc(sizeof(zip_hash_entry_t))) == NULL) { - zip_error_set(error, ZIP_ER_MEMORY, 0); - return false; - } - entry->name = name; - entry->next = hash->table[hash_value]; - hash->table[hash_value] = entry; - entry->orig_index = -1; + if ((entry = (zip_hash_entry_t *)malloc(sizeof(zip_hash_entry_t))) == NULL) { + zip_error_set(error, ZIP_ER_MEMORY, 0); + return false; + } + entry->name = name; + entry->next = hash->table[table_index]; + hash->table[table_index] = entry; + entry->hash_value = hash_value; + entry->orig_index = -1; + hash->nentries++; + if (hash->nentries > hash->table_size * HASH_MAX_FILL && hash->table_size < HASH_MAX_SIZE) { + if (!hash_resize(hash, hash->table_size * 2, error)) { + return false; + } + } } if (flags & ZIP_FL_UNCHANGED) { - entry->orig_index = (zip_int64_t)index; + entry->orig_index = (zip_int64_t)index; } entry->current_index = (zip_int64_t)index; return true; } + /* remove entry from hash, error if not found */ bool -_zip_hash_delete(zip_hash_t *hash, const zip_uint8_t *name, zip_error_t *error) -{ - zip_uint16_t hash_value; +_zip_hash_delete(zip_hash_t *hash, const zip_uint8_t *name, zip_error_t *error) { + zip_uint32_t hash_value, index; zip_hash_entry_t *entry, *previous; if (hash == NULL || name == NULL) { - zip_error_set(error, ZIP_ER_INVAL, 0); - return false; + zip_error_set(error, ZIP_ER_INVAL, 0); + return false; } - hash_value = _hash_string(name, hash->table_size); - previous = NULL; - entry = hash->table[hash_value]; - while (entry) { - if (strcmp((const char *)name, (const char *)entry->name) == 0) { - if (entry->orig_index == -1) { - if (previous) { - previous->next = entry->next; - } - else { - hash->table[hash_value] = entry->next; - } - free(entry); - } - else { - entry->current_index = -1; - } - return true; - } - previous = entry; - entry = entry->next; - }; + if (hash->nentries > 0) { + hash_value = hash_string(name); + index = hash_value % hash->table_size; + previous = NULL; + entry = hash->table[index]; + while (entry) { + if (entry->hash_value == hash_value && strcmp((const char *)name, (const char *)entry->name) == 0) { + if (entry->orig_index == -1) { + if (previous) { + previous->next = entry->next; + } + else { + hash->table[index] = entry->next; + } + free(entry); + hash->nentries--; + if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) { + if (!hash_resize(hash, hash->table_size / 2, error)) { + return false; + } + } + } + else { + entry->current_index = -1; + } + return true; + } + previous = entry; + entry = entry->next; + } + } zip_error_set(error, ZIP_ER_NOENT, 0); return false; } + /* find value for entry in hash, -1 if not found */ zip_int64_t -_zip_hash_lookup(zip_hash_t *hash, const zip_uint8_t *name, zip_flags_t flags, zip_error_t *error) -{ - zip_uint16_t hash_value; +_zip_hash_lookup(zip_hash_t *hash, const zip_uint8_t *name, zip_flags_t flags, zip_error_t *error) { + zip_uint32_t hash_value, index; zip_hash_entry_t *entry; if (hash == NULL || name == NULL) { - zip_error_set(error, ZIP_ER_INVAL, 0); - return -1; + zip_error_set(error, ZIP_ER_INVAL, 0); + return -1; } - hash_value = _hash_string(name, hash->table_size); - for (entry = hash->table[hash_value]; entry != NULL; entry = entry->next) { - if (strcmp((const char *)name, (const char *)entry->name) == 0) { - if (flags & ZIP_FL_UNCHANGED) { - if (entry->orig_index != -1) { - return entry->orig_index; - } - } - else { - if (entry->current_index != -1) { - return entry->current_index; - } - } - break; - } + if (hash->nentries > 0) { + hash_value = hash_string(name); + index = hash_value % hash->table_size; + for (entry = hash->table[index]; entry != NULL; entry = entry->next) { + if (strcmp((const char *)name, (const char *)entry->name) == 0) { + if (flags & ZIP_FL_UNCHANGED) { + if (entry->orig_index != -1) { + return entry->orig_index; + } + } + else { + if (entry->current_index != -1) { + return entry->current_index; + } + } + break; + } + } } zip_error_set(error, ZIP_ER_NOENT, 0); - return -1; + return -1; } -void -_zip_hash_revert(zip_hash_t *hash) -{ - zip_uint16_t i; + +bool +_zip_hash_reserve_capacity(zip_hash_t *hash, zip_uint64_t capacity, zip_error_t *error) { + zip_uint32_t new_size; + + if (capacity == 0) { + return true; + } + + new_size = size_for_capacity(capacity); + + if (new_size <= hash->table_size) { + return true; + } + + if (!hash_resize(hash, new_size, error)) { + return false; + } + + return true; +} + + +bool +_zip_hash_revert(zip_hash_t *hash, zip_error_t *error) { + zip_uint32_t i; zip_hash_entry_t *entry, *previous; - + for (i = 0; i < hash->table_size; i++) { - previous = NULL; - entry = hash->table[i]; - while (entry) { - if (entry->orig_index == -1) { - zip_hash_entry_t *p; - if (previous) { - previous->next = entry->next; - } - else { - hash->table[i] = entry->next; - } - p = entry; - entry = entry->next; - /* previous does not change */ - free(p); - } - else { - entry->current_index = entry->orig_index; - previous = entry; - entry = entry->next; - } - } + previous = NULL; + entry = hash->table[i]; + while (entry) { + if (entry->orig_index == -1) { + zip_hash_entry_t *p; + if (previous) { + previous->next = entry->next; + } + else { + hash->table[i] = entry->next; + } + p = entry; + entry = entry->next; + /* previous does not change */ + free(p); + hash->nentries--; + } + else { + entry->current_index = entry->orig_index; + previous = entry; + entry = entry->next; + } + } + } + + if (hash->nentries < hash->table_size * HASH_MIN_FILL && hash->table_size > HASH_MIN_SIZE) { + zip_uint32_t new_size = hash->table_size / 2; + while (hash->nentries < new_size * HASH_MIN_FILL && new_size > HASH_MIN_SIZE) { + new_size /= 2; + } + if (!hash_resize(hash, new_size, error)) { + return false; + } } + + return true; } |