Commit 0e9e35e3 authored by Thomas Haller's avatar Thomas Haller

all: refactor hashing by introducing NMHashState

The privious NM_HASH_* macros directly operated on a guint value
and were thus close to the actual implementation.

Replace them by adding a NMHashState struct and accessors to
update the hash state. This hides the implementation better
and would allow us to carry more state. For example, we could
switch to siphash24() transparently.

For now, we still do a form basically djb2 hashing, albeit with
differing start seed.

Also add nm_hash_str() and nm_str_hash():

- nm_hash_str() is our own string hashing implementation

- nm_str_hash() is our own string implementation, but with a
  GHashFunc signature, suitable to pass it to g_hash_table_new().
  Also, it has this name in order to remind you of g_str_hash(),
  which it is replacing.
parent 281d2d9f
......@@ -4007,27 +4007,27 @@ _nm_utils_strstrdictkey_hash (gconstpointer a)
{
const NMUtilsStrStrDictKey *k = a;
const signed char *p;
guint32 h = NM_HASH_INIT (76642997u);
NMHashState h;
nm_hash_init (&h, 76642997u);
if (k) {
if (((int) k->type) & ~STRSTRDICTKEY_ALL_SET)
g_return_val_if_reached (0);
h = NM_HASH_COMBINE (h, k->type);
nm_hash_update_uint (&h, k->type);
if (k->type & STRSTRDICTKEY_ALL_SET) {
p = (void *) k->data;
for (; *p != '\0'; p++)
h = NM_HASH_COMBINE (h, *p);
nm_hash_update_uint (&h, *p);
if (k->type == STRSTRDICTKEY_ALL_SET) {
/* the key contains two strings. Continue... */
h = NM_HASH_COMBINE (h, '\0');
nm_hash_update_uint (&h, '\0');
for (p++; *p != '\0'; p++)
h = NM_HASH_COMBINE (h, *p);
nm_hash_update_uint (&h, *p);
}
}
}
return h;
return nm_hash_complete (&h);
}
gboolean
......
......@@ -26,6 +26,7 @@
#include <string.h>
#include "nm-utils/c-list-util.h"
#include "nm-utils/nm-hash-utils.h"
#include "nm-utils.h"
#include "nm-setting-private.h"
......@@ -78,6 +79,66 @@ G_STATIC_ASSERT (sizeof (bool) <= sizeof (int));
/*****************************************************************************/
static guint
_test_hash_str (const char *str)
{
NMHashState h;
guint v, v2;
const guint SEED = 10;
nm_hash_init (&h, SEED);
nm_hash_update_str (&h, str);
v = nm_hash_complete (&h);
{
/* assert that hashing a string and a buffer yields the
* same result.
*
* I think that is a desirable property. */
nm_hash_init (&h, SEED);
nm_hash_update_mem (&h, str, str ? strlen (str) : 0);
v2 = nm_hash_complete (&h);
}
g_assert (v == v2);
return v;
}
static void
test_nm_hash (void)
{
NMHashState h;
_test_hash_str ("");
_test_hash_str ("a");
_test_hash_str ("aa");
_test_hash_str ("diceros bicornis longipes");
memset (&h, 0, sizeof (h));
g_assert_cmpint (nm_hash_complete (&h), ==, 1396707757u);
/* note how two different string still always hash the same,
* although we use a global seed that we initialize each time
* differently.
*
* The aim would be that two collisions depend on the seed value,
* which they currently don't. */
g_assert_cmpint (nm_hash_str ("BA"), ==, nm_hash_str ("Ab"));
/* with the current hasing algorighm, once we know two words that hash
* the same, we can trivally find more collions by concatenating
* them (which is bad). */
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("AbAbAb"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("AbAbBA"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("AbBAAb"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("AbBABA"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("BAAbAb"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("BAAbBA"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("BABAAb"));
g_assert_cmpint (nm_hash_str ("BABABA"), ==, nm_hash_str ("BABABA"));
}
/*****************************************************************************/
static void
test_nm_g_slice_free_fcn (void)
{
......@@ -6341,6 +6402,7 @@ int main (int argc, char **argv)
{
nmtst_init (&argc, &argv, TRUE);
g_test_add_func ("/core/general/test_nm_hash", test_nm_hash);
g_test_add_func ("/core/general/test_nm_g_slice_free_fcn", test_nm_g_slice_free_fcn);
g_test_add_func ("/core/general/test_c_list_sort", test_c_list_sort);
g_test_add_func ("/core/general/test_dedup_multi", test_dedup_multi);
......
......@@ -176,21 +176,21 @@ _dict_idx_entries_hash (const NMDedupMultiEntry *entry)
const NMDedupMultiIdxType *idx_type;
const NMDedupMultiObj *obj;
gboolean lookup_head;
guint h;
NMHashState h;
_entry_unpack (entry, &idx_type, &obj, &lookup_head);
nm_hash_init (&h, 1914869417u);
if (idx_type->klass->idx_obj_partition_hash) {
nm_assert (obj);
h = idx_type->klass->idx_obj_partition_hash (idx_type, obj);
} else
h = NM_HASH_INIT (1914869417u);
nm_hash_update_uint (&h, idx_type->klass->idx_obj_partition_hash (idx_type, obj));
}
if (!lookup_head)
h = NM_HASH_COMBINE (h, idx_type->klass->idx_obj_id_hash (idx_type, obj));
nm_hash_update_uint (&h, idx_type->klass->idx_obj_id_hash (idx_type, obj));
h = NM_HASH_COMBINE (h, GPOINTER_TO_UINT (idx_type));
return h;
nm_hash_update_ptr (&h, idx_type);
return nm_hash_complete (&h);
}
static gboolean
......
......@@ -28,12 +28,14 @@
/*****************************************************************************/
guint
NM_HASH_INIT (guint seed)
void
nm_hash_init (NMHashState *state, guint static_seed)
{
static volatile guint global_seed = 0;
guint g, s;
nm_assert (state);
/* we xor @seed with a random @global_seed. This is to make the hashing behavior
* less predictable and harder to exploit collisions. */
g = global_seed;
......@@ -46,5 +48,28 @@ NM_HASH_INIT (guint seed)
nm_assert (g);
}
return g ^ seed;
s = g ^ static_seed;
state->hash = s;
}
guint
nm_hash_str (const char *str)
{
NMHashState h;
nm_hash_init (&h, 1867854211u);
nm_hash_update_str (&h, str);
return nm_hash_complete (&h);
}
guint
nm_str_hash (gconstpointer str)
{
return nm_hash_str (str);
}
guint
nm_direct_hash (gconstpointer ptr)
{
return nm_hash_ptr (ptr);
}
......@@ -22,26 +22,107 @@
#ifndef __NM_HASH_UTILS_H__
#define __NM_HASH_UTILS_H__
guint NM_HASH_INIT (guint seed);
#include <stdint.h>
typedef struct {
guint hash;
} NMHashState;
void nm_hash_init (NMHashState *state, guint static_seed);
static inline guint
NM_HASH_COMBINE (guint h, guint val)
nm_hash_complete (NMHashState *state)
{
/* see g_str_hash() for reasons */
return (h << 5) + h + val;
nm_assert (state);
/* we don't ever want to return a zero hash.
*
* NMPObject requires that in _idx_obj_part(), and it's just a good idea. */
return state->hash ?: 1396707757u;
}
static inline guint
NM_HASH_COMBINE_UINT64 (guint h, guint64 val)
static inline void
nm_hash_update_uint (NMHashState *state, guint val)
{
guint h;
nm_assert (state);
h = state->hash;
h = (h << 5) + h + val;
state->hash = h;
}
static inline void
nm_hash_update_uint64 (NMHashState *state, guint64 val)
{
guint h;
nm_assert (state);
h = state->hash;
h = (h << 5) + h + ((guint) val);
h = (h << 5) + h + ((guint) (val >> 32));
state->hash = h;
}
static inline void
nm_hash_update_ptr (NMHashState *state, gconstpointer ptr)
{
if (sizeof (ptr) <= sizeof (guint))
nm_hash_update_uint (state, ((guint) ((uintptr_t) ptr)));
else
nm_hash_update_uint64 (state, (guint64) ((uintptr_t) ptr));
}
static inline void
nm_hash_update_mem (NMHashState *state, const void *ptr, gsize n)
{
gsize i;
guint h;
nm_assert (state);
/* use the same hash seed as nm_hash_update_str().
* That way, nm_hash_update_str(&h, s) is identical to
* nm_hash_update_mem(&h, s, strlen(s)). */
h = state->hash;
for (i = 0; i < n; i++)
h = (h << 5) + h + ((guint) ((const guint8 *) ptr)[i]);
h = (h << 5) + h + 1774132687u;
state->hash = h;
}
static inline void
nm_hash_update_str (NMHashState *state, const char *str)
{
return NM_HASH_COMBINE (h, (((guint) val) & 0xFFFFFFFFu) + ((guint) (val >> 32)));
const guint8 *p = (const guint8 *) str;
guint8 c;
guint h;
nm_assert (state);
/* Note that NULL hashes differently from "". */
h = state->hash;
if (str) {
while ((c = *p++))
h = (h << 5) + h + ((guint) c);
h = (h << 5) + h + 1774132687u;
} else
h = (h << 5) + h + 2967906233u;
state->hash = h;
}
static inline guint
NM_HASH_POINTER (gconstpointer ptr)
nm_hash_ptr (gconstpointer ptr)
{
/* same as g_direct_hash(), but inline. */
return GPOINTER_TO_UINT (ptr);
if (sizeof (ptr) <= sizeof (guint))
return (guint) ((uintptr_t) ptr);
else
return ((guint) (((uintptr_t) ptr) >> 32)) ^ ((guint) ((uintptr_t) ptr));
}
guint nm_direct_hash (gconstpointer str);
guint nm_hash_str (const char *str);
guint nm_str_hash (gconstpointer str);
#endif /* __NM_HASH_UTILS_H__ */
......@@ -2858,11 +2858,12 @@ typedef struct {
static guint
_v4_has_shadowed_routes_detect_hash (const IP4RPFilterData *d)
{
guint h = NM_HASH_INIT (1105201169u);
NMHashState h;
h = NM_HASH_COMBINE (h, d->network);
h = NM_HASH_COMBINE (h, d->plen);
return h;
nm_hash_init (&h, 1105201169u);
nm_hash_update_uint (&h, d->network);
nm_hash_update_uint (&h, d->plen);
return nm_hash_complete (&h);
}
static gboolean
......
......@@ -274,13 +274,14 @@ static guint
lldp_neighbor_id_hash (gconstpointer ptr)
{
const LldpNeighbor *neigh = ptr;
guint hash = NM_HASH_INIT (23423423u);
hash = NM_HASH_COMBINE (hash, neigh->chassis_id ? g_str_hash (neigh->chassis_id) : 12321u);
hash = NM_HASH_COMBINE (hash, neigh->port_id ? g_str_hash (neigh->port_id) : 34342343u);
hash = NM_HASH_COMBINE (hash, neigh->chassis_id_type);
hash = NM_HASH_COMBINE (hash, neigh->port_id_type);
return hash;
NMHashState h;
nm_hash_init (&h, 23423423u);
nm_hash_update_str (&h, neigh->chassis_id);
nm_hash_update_str (&h, neigh->port_id);
nm_hash_update_uint (&h, neigh->chassis_id_type);
nm_hash_update_uint (&h, neigh->port_id_type);
return nm_hash_complete (&h);
}
static int
......
......@@ -189,12 +189,11 @@ nm_utils_exp10 (gint16 ex)
guint
nm_utils_in6_addr_hash (const struct in6_addr *addr)
{
guint hash = NM_HASH_INIT (3675559913u);
int i;
NMHashState h;
for (i = 0; i < sizeof (*addr); i++)
hash = NM_HASH_COMBINE (hash, ((const guint8 *) addr)[i]);
return hash;
nm_hash_init (&h, 3675559913u);
nm_hash_update_in6addr (&h, addr);
return nm_hash_complete (&h);
}
/*****************************************************************************/
......
......@@ -128,24 +128,23 @@ nm_utils_ip6_address_same_prefix (const struct in6_addr *addr_a, const struct in
#define NM_CMP_DIRECT_IN6ADDR_SAME_PREFIX(a, b, plen) \
NM_CMP_RETURN (nm_utils_ip6_address_same_prefix_cmp ((a), (b), (plen)))
static inline guint
NM_HASH_COMBINE_IN6ADDR (guint h, const struct in6_addr *addr)
static inline void
nm_hash_update_in6addr (NMHashState *h, const struct in6_addr *addr)
{
if (!addr)
g_return_val_if_reached (h);
return NM_HASH_COMBINE (h, nm_utils_in6_addr_hash (addr));
nm_hash_update_mem (h, addr, addr ? sizeof (*addr) : 0);
}
static inline guint
NM_HASH_COMBINE_IN6ADDR_PREFIX (guint h, const struct in6_addr *addr, guint8 plen)
static inline void
nm_hash_update_in6addr_prefix (NMHashState *h, const struct in6_addr *addr, guint8 plen)
{
struct in6_addr a;
if (!addr)
g_return_val_if_reached (h);
g_return_if_reached ();
nm_utils_ip6_address_clear_host_address (&a, addr, plen);
/* we don't hash plen itself. The caller may want to do that.*/
return NM_HASH_COMBINE (h, nm_utils_in6_addr_hash (&a));
nm_hash_update_in6addr (h, &a);
}
double nm_utils_exp10 (gint16 e);
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment