lib: os: add hash function specification and implementation

Hash functions are necessary for Hashmaps (a.k.a. Hash Tables).

* Create a flexible hash function interface
* Implementation 1. Murmur3
* Implementation 2. djb2
* Kconfig option for system-wide 32-bit hash

Signed-off-by: Chris Friedt <cfriedt@meta.com>
This commit is contained in:
Chris Friedt 2022-12-30 14:13:48 -05:00 committed by Carles Cufí
parent edb5ee1575
commit b75df60f0b
6 changed files with 283 additions and 0 deletions

View File

@ -0,0 +1,136 @@
/*
* Copyright (c) 2022 Meta
*
* SPDX-License-Identifier: Apache-2.0
*/
#ifndef ZEPHYR_INCLUDE_SYS_HASH_FUNCTION_H_
#define ZEPHYR_INCLUDE_SYS_HASH_FUNCTION_H_
#include <stddef.h>
#include <stdint.h>
#include <zephyr/sys/__assert.h>
#include <zephyr/sys/util_macro.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* @brief 32-bit Hash function interface
*
* Hash functions are used to map data from an arbitrarily large space to a
* (typically smaller) fixed-size space. For a given input, a hash function will
* consistently generate the same, semi-unique numerical value. Even for
* marginally different data, a good hash function will distribute the entropy
* almost evenly over all bits in the hashed value when combined with modulo
* arithmetic over a finite-sized numeric field.
*
* @param str a string of input data
* @param n the number of bytes in @p str
*
* @return the numeric hash associated with @p str
*/
typedef uint32_t (*sys_hash_func32_t)(const void *str, size_t n);
/**
* @brief The naive identity hash function
*
* This hash function requires that @p n is equal to the size of a primitive
* type, such as `[u]int8_t`, `[u]int16_t`, `[u]int32_t`, `[u]int64_t`,
* `float`, `double`, or `void *`, and that the alignment of @p str agrees
* with that of the respective native type.
*
* @note The identity hash function is used for testing @ref sys_hashmap.
*
* @param str a string of input data
* @param n the number of bytes in @p str
*
* @return the numeric hash associated with @p str
*/
static inline uint32_t sys_hash32_identity(const void *str, size_t n)
{
switch (n) {
case sizeof(uint8_t):
return *(uint8_t *)str;
case sizeof(uint16_t):
return *(uint16_t *)str;
case sizeof(uint32_t):
return *(uint32_t *)str;
case sizeof(uint64_t):
return (uint32_t)(*(uint64_t *)str);
default:
break;
}
__ASSERT(false, "invalid str length %zu", n);
return 0;
}
/**
* @brief Daniel J. Bernstein's hash function
*
* Some notes:
* - normally, this hash function is used on NUL-terminated strings
* - it has been modified to support arbitrary sequences of bytes
* - it has been modified to use XOR rather than addition
*
* @param str a string of input data
* @param n the number of bytes in @p str
*
* @return the numeric hash associated with @p str
*
* @note enable with @kconfig{CONFIG_SYS_HASH_FUNC32_DJB2}
*
* @see https://theartincode.stanis.me/008-djb2/
*/
uint32_t sys_hash32_djb2(const void *str, size_t n);
/**
* @brief Murmur3 hash function
*
* @param str a string of input data
* @param n the number of bytes in @p str
*
* @return the numeric hash associated with @p str
*
* @note enable with @kconfig{CONFIG_SYS_HASH_FUNC32_MURMUR3}
*
* @see https://en.wikipedia.org/wiki/MurmurHash
*/
uint32_t sys_hash32_murmur3(const void *str, size_t n);
/**
* @brief System default 32-bit hash function
*
* @param str a string of input data
* @param n the number of bytes in @p str
*
* @return the numeric hash associated with @p str
*/
static inline uint32_t sys_hash32(const void *str, size_t n)
{
if (IS_ENABLED(CONFIG_SYS_HASH_FUNC32_CHOICE_IDENTITY)) {
return sys_hash32_identity(str, n);
}
if (IS_ENABLED(CONFIG_SYS_HASH_FUNC32_CHOICE_DJB2)) {
return sys_hash32_djb2(str, n);
}
if (IS_ENABLED(CONFIG_SYS_HASH_FUNC32_CHOICE_MURMUR3)) {
return sys_hash32_murmur3(str, n);
}
__ASSERT(0, "No default 32-bit hash. See CONFIG_SYS_HASH_FUNC32_CHOICE");
return 0;
}
#ifdef __cplusplus
}
#endif
#endif /* ZEPHYR_INCLUDE_SYS_HASH_FUNCTION_H_ */

View File

@ -64,6 +64,9 @@ zephyr_sources_ifdef(CONFIG_SYS_MEM_BLOCKS mem_blocks.c)
zephyr_sources_ifdef(CONFIG_WINSTREAM winstream.c)
zephyr_sources_ifdef(CONFIG_SYS_HASH_FUNC32_DJB2 hash_func32_djb2.c)
zephyr_sources_ifdef(CONFIG_SYS_HASH_FUNC32_MURMUR3 hash_func32_murmur3.c)
zephyr_library_include_directories(
${ZEPHYR_BASE}/kernel/include
${ZEPHYR_BASE}/arch/${ARCH}/include

View File

@ -176,4 +176,6 @@ rsource "Kconfig.cbprintf"
rsource "Kconfig.heap"
rsource "Kconfig.hash_func"
endmenu

46
lib/os/Kconfig.hash_func Normal file
View File

@ -0,0 +1,46 @@
# Copyright (c) 2022 Meta
#
# SPDX-License-Identifier: Apache-2.0
menu "Hash Function Support"
config SYS_HASH_FUNC32
bool "Hash function support"
help
Enable this option to support hash functions.
if SYS_HASH_FUNC32
config SYS_HASH_FUNC32_DJB2
bool "Daniel J. Bernstein's hash function (djb2)"
config SYS_HASH_FUNC32_MURMUR3
bool "Murmur3 hash function"
choice SYS_HASH_FUNC32_CHOICE
prompt "Default system-wide 32-bit hash function"
default SYS_HASH_FUNC32_CHOICE_MURMUR3
help
The default system-wide 32-bit hash function is sys_hash32().
config SYS_HASH_FUNC32_CHOICE_DJB2
bool "Default 32-bit hash is djb2"
select SYS_HASH_FUNC32_DJB2
config SYS_HASH_FUNC32_CHOICE_MURMUR3
bool "Default 32-bit hash is Murmur3"
select SYS_HASH_FUNC32_MURMUR3
config SYS_HASH_FUNC32_CHOICE_IDENTITY
bool "Default 32-bit hash is the identity"
help
This is the naive identity hash function. It only works for strings
either 1, 2, 4, or 8 bytes in length and so is suitable for scalar
values such as keys in a Hashmap. It is implemented as a static
inline function.
endchoice # SYS_HASH_FUNC_CHOICE
endif # SYS_HASH_FUNC
endmenu

48
lib/os/hash_func32_djb2.c Normal file
View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 1991, Daniel J. Bernstein
*
* SPDX-License-Identifier: Apache-2.0
*/
/*
* This software is in the PD as of approximately 2007. The story
* behind it (and other DJB software) is quite amazing. Thanks Dan!!
*
* Many distributors have relicensed it under e.g. BSD, MIT, and others.
*
* It is not clear what the original license name is or how to declare it
* using SPDX terms, since it explicitly has no license. I think it makes
* sense to use the default Zephyr licensing.
*
* Note: this is not a cryptographically strong hash algorithm.
*
* For details, please see
* https://cr.yp.to/rights.html
* https://cr.yp.to/distributors.html
* http://thedjbway.b0llix.net/license_free.html
*
* https://groups.google.com/g/comp.lang.c/c/lSKWXiuNOAk
* https://theartincode.stanis.me/008-djb2/
* http://www.cse.yorku.ca/~oz/hash.html
* https://bit.ly/3IxUVvC
*/
#include <stddef.h>
#include <stdint.h>
#include <zephyr/sys/hash_function.h>
uint32_t sys_hash32_djb2(const void *str, size_t n)
{
uint32_t hash;
const uint8_t *d;
/* The number 5381 is the initializer for the djb2 hash */
for (hash = 5381, d = str; n > 0; --n, ++d) {
/* The djb2 hash multiplier is 33 (i.e. 2^5 + 1) */
hash = (hash << 5) + hash;
hash ^= *d;
}
return hash;
}

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2022, Meta
*
* SPDX-License-Identifier: Apache-2.0
*/
#include <stddef.h>
#include <stdint.h>
static inline uint32_t murmur_32_scramble(uint32_t k)
{
k *= 0xcc9e2d51;
k = (k << 15) | (k >> 17);
k *= 0x1b873593;
return k;
}
uint32_t sys_hash32_murmur3(const char *str, size_t n)
{
uint32_t k;
/* seed of 0 */
uint32_t h = 0;
const size_t len = n;
for (; n >= sizeof(uint32_t); n -= sizeof(uint32_t), str += sizeof(uint32_t)) {
k = *(const uint32_t *)str;
h ^= murmur_32_scramble(k);
h = (h << 13) | (h >> 19);
h = h * 5 + 0xe6546b64;
}
for (k = 0; n != 0; --n, ++str) {
k <<= 8;
k |= *str;
}
h ^= murmur_32_scramble(k);
h ^= len;
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}